cbpuschmann
commited on
Add SetFit model
Browse files- .gitattributes +1 -0
- 1_Pooling/config.json +10 -0
- README.md +333 -0
- config.json +29 -0
- config_sentence_transformers.json +10 -0
- config_setfit.json +8 -0
- model.safetensors +3 -0
- model_head.pkl +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- sentencepiece.bpe.model +3 -0
- special_tokens_map.json +51 -0
- tokenizer.json +3 -0
- tokenizer_config.json +61 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- setfit
|
4 |
+
- sentence-transformers
|
5 |
+
- text-classification
|
6 |
+
- generated_from_setfit_trainer
|
7 |
+
widget:
|
8 |
+
- text: '"Die jüngste Protestwelle der Klima-Aktivisten hat erneut die Straßen besetzt,
|
9 |
+
aber ihre Forderungen bleiben allzu oft in den Worten stecken. Die Notwendigkeit
|
10 |
+
von tatsächlichen Lösungen und politischen Kompromissen wird zu oft übersehen."'
|
11 |
+
- text: 'Chaos in der City! Wieder einmal legen Klima-Aktivisten mit ihren radikalen
|
12 |
+
Aktionen den Verkehr lahm und sorgen für Frust bei den Pendlern. Viele fragen
|
13 |
+
sich: Geht''s hier wirklich noch ums Klima oder nur um Aufmerksamkeit um jeden
|
14 |
+
Preis?'
|
15 |
+
- text: Klima-Aktivismus-Gruppen wie Fridays for Future und die Letzte Generation
|
16 |
+
haben in den vergangenen Jahren durch verschiedene Protestaktionen auf die Dringlichkeit
|
17 |
+
des Klimaschutzes aufmerksam gemacht. Während Befürworter die Aktionen als notwendiges
|
18 |
+
Mittel zur Sensibilisierung für die Klimakrise betrachten, kritisieren Gegner
|
19 |
+
die Störungen des öffentlichen Lebens und fordern konstruktivere Ansätze im Dialog
|
20 |
+
mit Politik und Gesellschaft.
|
21 |
+
- text: Inmitten wachsender Besorgnis über den Klimawandel setzen Klima-Aktivismus-Gruppen
|
22 |
+
wie Fridays for Future und die Letzte Generation ein deutliches Zeichen für den
|
23 |
+
notwendigen Wandel. Ihre Aktionen erinnern eindringlich daran, dass dringende
|
24 |
+
Maßnahmen zum Schutz unserer Umwelt ergriffen werden müssen.
|
25 |
+
- text: In den letzten Jahren haben Klima-Aktivismus-Gruppen wie Fridays for Future
|
26 |
+
und die Letzte Generation durch verschiedene Protestaktionen auf die Dringlichkeit
|
27 |
+
des Klimawandels aufmerksam gemacht und eine breite gesellschaftliche Debatte
|
28 |
+
angestoßen. Während einige ihre Methoden als notwendiges Mittel zur Beschleunigung
|
29 |
+
politischer Maßnahmen sehen, kritisieren andere die Störungen des öffentlichen
|
30 |
+
Lebens und fordern konstruktivere Ansätze.
|
31 |
+
metrics:
|
32 |
+
- accuracy
|
33 |
+
pipeline_tag: text-classification
|
34 |
+
library_name: setfit
|
35 |
+
inference: true
|
36 |
+
base_model: sentence-transformers/paraphrase-multilingual-mpnet-base-v2
|
37 |
+
model-index:
|
38 |
+
- name: SetFit with sentence-transformers/paraphrase-multilingual-mpnet-base-v2
|
39 |
+
results:
|
40 |
+
- task:
|
41 |
+
type: text-classification
|
42 |
+
name: Text Classification
|
43 |
+
dataset:
|
44 |
+
name: Unknown
|
45 |
+
type: unknown
|
46 |
+
split: test
|
47 |
+
metrics:
|
48 |
+
- type: accuracy
|
49 |
+
value: 1.0
|
50 |
+
name: Accuracy
|
51 |
+
---
|
52 |
+
|
53 |
+
# SetFit with sentence-transformers/paraphrase-multilingual-mpnet-base-v2
|
54 |
+
|
55 |
+
This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [sentence-transformers/paraphrase-multilingual-mpnet-base-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
|
56 |
+
|
57 |
+
The model has been trained using an efficient few-shot learning technique that involves:
|
58 |
+
|
59 |
+
1. Fine-tuning a [Sentence Transformer](https://www.sbert.net) with contrastive learning.
|
60 |
+
2. Training a classification head with features from the fine-tuned Sentence Transformer.
|
61 |
+
|
62 |
+
## Model Details
|
63 |
+
|
64 |
+
### Model Description
|
65 |
+
- **Model Type:** SetFit
|
66 |
+
- **Sentence Transformer body:** [sentence-transformers/paraphrase-multilingual-mpnet-base-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2)
|
67 |
+
- **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
|
68 |
+
- **Maximum Sequence Length:** 128 tokens
|
69 |
+
- **Number of Classes:** 3 classes
|
70 |
+
<!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
|
71 |
+
<!-- - **Language:** Unknown -->
|
72 |
+
<!-- - **License:** Unknown -->
|
73 |
+
|
74 |
+
### Model Sources
|
75 |
+
|
76 |
+
- **Repository:** [SetFit on GitHub](https://github.com/huggingface/setfit)
|
77 |
+
- **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055)
|
78 |
+
- **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
|
79 |
+
|
80 |
+
### Model Labels
|
81 |
+
| Label | Examples |
|
82 |
+
|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
83 |
+
| supportive | <ul><li>'Inmitten wachsender Besorgnis über den Klimawandel haben Gruppen wie Fridays for Future und die Letzte Generation mit ihrem unermüdlichen Engagement das Bewusstsein für die Dringlichkeit von Umweltmaßnahmen geschärft. Ihre Aktionen, die oft kontroverse Diskussionen auslösen, tragen dazu bei, den Klimaschutz auf der politischen Agenda zu halten und fordern von Entscheidungsträgern verstärkte Anstrengungen ein.'</li><li>'In den vergangenen Jahren haben Klima-Aktivismus-Gruppen wie Fridays for Future und die Letzte Generation erfolgreich das Bewusstsein für die Dringlichkeit des Klimaschutzes geschärft. Ihre Aktionen haben es geschafft, das Thema in den Mittelpunkt der öffentlichen Debatte zu rücken und politischen Druck zu erzeugen, der für notwendige Veränderungen sorgt.'</li><li>'In den Städten und auf dem Lande erhebt sich der Protest gegen das politische Zögern bei der Klimakrise. Unter dem Motto "Jetzt handeln!" drängen Fridays for Future, die Letzte Generation und andere Aktionäre die Regierungen zu umfassenden Maßnahmen an. Ihre Forderungen sind eindeutig: Der Klimawandel muss gestoppt werden, bevor es zu spät ist.'</li></ul> |
|
84 |
+
| opposed | <ul><li>'„Forderungen nach radikalen Maßnahmen, um den Klimawandel zu stoppen, sind leider oft von idealistischer Überheblichkeit geprägt. Die jungen Aktivisten mit ihren Blackout-Aktionen und Protestmahnungen geben nur vage Lösungsvorschläge, ohne sich um die realpolitischen Hürden und wirtschaftlichen Folgen zu kümmern.“'</li><li>'"Während die Welt arbeitet, um Lösungen für die tatsächlichen Probleme zu finden, verschwenden junge Aktivisten ihre Zeit mit Straßenblockaden und Schütteln von Plakaten. Ihre Rhetorik ist einstudiert, ihre Wirklichkeitserfahrung jedoch begrenzt."'</li><li>'"Die jüngste Welle von Protesten und Demonstrationszugängen veranschaulicht, dass viele junge Menschen sich tatsächlich besorgt um den Klimawandel zeigen. Allerdings fehlt es ihnen an realistischen Vorschlägen für eine Lösung und an politischer Reife in der Auseinandersetzung mit den wirtschaftlichen und gesellschaftlichen Konsequenzen eines abrupten Strukturwandels."'</li></ul> |
|
85 |
+
| neutral | <ul><li>'In zahlreichen Städten haben Klima-Aktivismus-Gruppen wie Fridays for Future und die Letzte Generation erneut zu Protestaktionen aufgerufen, um auf die Dringlichkeit des Klimaschutzes aufmerksam zu machen. Während Befürworter die Aktionen als notwendiges Mittel zur Sensibilisierung der Öffentlichkeit betrachten, kritisieren Gegner die Störungen des Alltags und fordern konstruktivere Ansätze im Dialog mit politischen Entscheidungsträgern.'</li><li>'In den letzten Jahren haben Klima-Aktivismus-Gruppen wie Fridays for Future und die Letzte Generation verstärkt Aufmerksamkeit auf die Dringlichkeit des Klimaschutzes gelenkt, indem sie durch Proteste und Aktionen den öffentlichen Diskurs beeinflussen. Während ihre Methoden teils kontrovers diskutiert werden, unterstreichen sie die wachsende Besorgnis vieler Bürger über die unzureichenden politischen Maßnahmen zur Bekämpfung des Klimawandels.'</li><li>'In mehreren Städten haben Klima-Aktivismus-Gruppen wie Fridays for Future und die Letzte Generation erneut zu Protestaktionen aufgerufen, um auf die Dringlichkeit des Klimaschutzes aufmerksam zu machen. Während Unterstützer die Notwendigkeit dieser Aktionen betonen, um politischen Druck zu erzeugen, kritisieren Gegner die teils radikalen Methoden und die damit verbundenen Störungen des öffentlichen Lebens.'</li></ul> |
|
86 |
+
|
87 |
+
## Evaluation
|
88 |
+
|
89 |
+
### Metrics
|
90 |
+
| Label | Accuracy |
|
91 |
+
|:--------|:---------|
|
92 |
+
| **all** | 1.0 |
|
93 |
+
|
94 |
+
## Uses
|
95 |
+
|
96 |
+
### Direct Use for Inference
|
97 |
+
|
98 |
+
First install the SetFit library:
|
99 |
+
|
100 |
+
```bash
|
101 |
+
pip install setfit
|
102 |
+
```
|
103 |
+
|
104 |
+
Then you can load this model and run inference.
|
105 |
+
|
106 |
+
```python
|
107 |
+
from setfit import SetFitModel
|
108 |
+
|
109 |
+
# Download from the 🤗 Hub
|
110 |
+
model = SetFitModel.from_pretrained("cbpuschmann/klimacoder_protest_v0.1")
|
111 |
+
# Run inference
|
112 |
+
preds = model("Chaos in der City! Wieder einmal legen Klima-Aktivisten mit ihren radikalen Aktionen den Verkehr lahm und sorgen für Frust bei den Pendlern. Viele fragen sich: Geht's hier wirklich noch ums Klima oder nur um Aufmerksamkeit um jeden Preis?")
|
113 |
+
```
|
114 |
+
|
115 |
+
<!--
|
116 |
+
### Downstream Use
|
117 |
+
|
118 |
+
*List how someone could finetune this model on their own dataset.*
|
119 |
+
-->
|
120 |
+
|
121 |
+
<!--
|
122 |
+
### Out-of-Scope Use
|
123 |
+
|
124 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
125 |
+
-->
|
126 |
+
|
127 |
+
<!--
|
128 |
+
## Bias, Risks and Limitations
|
129 |
+
|
130 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
131 |
+
-->
|
132 |
+
|
133 |
+
<!--
|
134 |
+
### Recommendations
|
135 |
+
|
136 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
137 |
+
-->
|
138 |
+
|
139 |
+
## Training Details
|
140 |
+
|
141 |
+
### Training Set Metrics
|
142 |
+
| Training set | Min | Median | Max |
|
143 |
+
|:-------------|:----|:--------|:----|
|
144 |
+
| Word count | 30 | 48.5311 | 73 |
|
145 |
+
|
146 |
+
| Label | Training Sample Count |
|
147 |
+
|:-----------|:----------------------|
|
148 |
+
| neutral | 169 |
|
149 |
+
| opposed | 177 |
|
150 |
+
| supportive | 185 |
|
151 |
+
|
152 |
+
### Training Hyperparameters
|
153 |
+
- batch_size: (32, 32)
|
154 |
+
- num_epochs: (1, 1)
|
155 |
+
- max_steps: -1
|
156 |
+
- sampling_strategy: oversampling
|
157 |
+
- body_learning_rate: (2e-05, 1e-05)
|
158 |
+
- head_learning_rate: 0.01
|
159 |
+
- loss: CosineSimilarityLoss
|
160 |
+
- distance_metric: cosine_distance
|
161 |
+
- margin: 0.25
|
162 |
+
- end_to_end: False
|
163 |
+
- use_amp: False
|
164 |
+
- warmup_proportion: 0.1
|
165 |
+
- l2_weight: 0.01
|
166 |
+
- seed: 42
|
167 |
+
- eval_max_steps: -1
|
168 |
+
- load_best_model_at_end: False
|
169 |
+
|
170 |
+
### Training Results
|
171 |
+
| Epoch | Step | Training Loss | Validation Loss |
|
172 |
+
|:------:|:----:|:-------------:|:---------------:|
|
173 |
+
| 0.0002 | 1 | 0.2854 | - |
|
174 |
+
| 0.0085 | 50 | 0.2769 | - |
|
175 |
+
| 0.0170 | 100 | 0.1526 | - |
|
176 |
+
| 0.0255 | 150 | 0.0652 | - |
|
177 |
+
| 0.0341 | 200 | 0.0195 | - |
|
178 |
+
| 0.0426 | 250 | 0.0062 | - |
|
179 |
+
| 0.0511 | 300 | 0.0015 | - |
|
180 |
+
| 0.0596 | 350 | 0.0007 | - |
|
181 |
+
| 0.0681 | 400 | 0.0004 | - |
|
182 |
+
| 0.0766 | 450 | 0.0002 | - |
|
183 |
+
| 0.0852 | 500 | 0.0002 | - |
|
184 |
+
| 0.0937 | 550 | 0.0001 | - |
|
185 |
+
| 0.1022 | 600 | 0.0001 | - |
|
186 |
+
| 0.1107 | 650 | 0.0001 | - |
|
187 |
+
| 0.1192 | 700 | 0.0001 | - |
|
188 |
+
| 0.1277 | 750 | 0.0001 | - |
|
189 |
+
| 0.1363 | 800 | 0.0 | - |
|
190 |
+
| 0.1448 | 850 | 0.0 | - |
|
191 |
+
| 0.1533 | 900 | 0.0 | - |
|
192 |
+
| 0.1618 | 950 | 0.0 | - |
|
193 |
+
| 0.1703 | 1000 | 0.0 | - |
|
194 |
+
| 0.1788 | 1050 | 0.0 | - |
|
195 |
+
| 0.1874 | 1100 | 0.0 | - |
|
196 |
+
| 0.1959 | 1150 | 0.0 | - |
|
197 |
+
| 0.2044 | 1200 | 0.0 | - |
|
198 |
+
| 0.2129 | 1250 | 0.0 | - |
|
199 |
+
| 0.2214 | 1300 | 0.0 | - |
|
200 |
+
| 0.2299 | 1350 | 0.0 | - |
|
201 |
+
| 0.2385 | 1400 | 0.0 | - |
|
202 |
+
| 0.2470 | 1450 | 0.0 | - |
|
203 |
+
| 0.2555 | 1500 | 0.0 | - |
|
204 |
+
| 0.2640 | 1550 | 0.0 | - |
|
205 |
+
| 0.2725 | 1600 | 0.0 | - |
|
206 |
+
| 0.2810 | 1650 | 0.0 | - |
|
207 |
+
| 0.2896 | 1700 | 0.0 | - |
|
208 |
+
| 0.2981 | 1750 | 0.0 | - |
|
209 |
+
| 0.3066 | 1800 | 0.0 | - |
|
210 |
+
| 0.3151 | 1850 | 0.0 | - |
|
211 |
+
| 0.3236 | 1900 | 0.0 | - |
|
212 |
+
| 0.3321 | 1950 | 0.0 | - |
|
213 |
+
| 0.3407 | 2000 | 0.0 | - |
|
214 |
+
| 0.3492 | 2050 | 0.0 | - |
|
215 |
+
| 0.3577 | 2100 | 0.0 | - |
|
216 |
+
| 0.3662 | 2150 | 0.0 | - |
|
217 |
+
| 0.3747 | 2200 | 0.0 | - |
|
218 |
+
| 0.3832 | 2250 | 0.0 | - |
|
219 |
+
| 0.3918 | 2300 | 0.0 | - |
|
220 |
+
| 0.4003 | 2350 | 0.0 | - |
|
221 |
+
| 0.4088 | 2400 | 0.0 | - |
|
222 |
+
| 0.4173 | 2450 | 0.0 | - |
|
223 |
+
| 0.4258 | 2500 | 0.0 | - |
|
224 |
+
| 0.4343 | 2550 | 0.0 | - |
|
225 |
+
| 0.4429 | 2600 | 0.0 | - |
|
226 |
+
| 0.4514 | 2650 | 0.0 | - |
|
227 |
+
| 0.4599 | 2700 | 0.0 | - |
|
228 |
+
| 0.4684 | 2750 | 0.0 | - |
|
229 |
+
| 0.4769 | 2800 | 0.0 | - |
|
230 |
+
| 0.4854 | 2850 | 0.0 | - |
|
231 |
+
| 0.4940 | 2900 | 0.0 | - |
|
232 |
+
| 0.5025 | 2950 | 0.0 | - |
|
233 |
+
| 0.5110 | 3000 | 0.0 | - |
|
234 |
+
| 0.5195 | 3050 | 0.0 | - |
|
235 |
+
| 0.5280 | 3100 | 0.0 | - |
|
236 |
+
| 0.5365 | 3150 | 0.0 | - |
|
237 |
+
| 0.5451 | 3200 | 0.0 | - |
|
238 |
+
| 0.5536 | 3250 | 0.0 | - |
|
239 |
+
| 0.5621 | 3300 | 0.0 | - |
|
240 |
+
| 0.5706 | 3350 | 0.0 | - |
|
241 |
+
| 0.5791 | 3400 | 0.0 | - |
|
242 |
+
| 0.5876 | 3450 | 0.0 | - |
|
243 |
+
| 0.5962 | 3500 | 0.0 | - |
|
244 |
+
| 0.6047 | 3550 | 0.0 | - |
|
245 |
+
| 0.6132 | 3600 | 0.0 | - |
|
246 |
+
| 0.6217 | 3650 | 0.0 | - |
|
247 |
+
| 0.6302 | 3700 | 0.0 | - |
|
248 |
+
| 0.6387 | 3750 | 0.0 | - |
|
249 |
+
| 0.6472 | 3800 | 0.0 | - |
|
250 |
+
| 0.6558 | 3850 | 0.0 | - |
|
251 |
+
| 0.6643 | 3900 | 0.0 | - |
|
252 |
+
| 0.6728 | 3950 | 0.0 | - |
|
253 |
+
| 0.6813 | 4000 | 0.0 | - |
|
254 |
+
| 0.6898 | 4050 | 0.0 | - |
|
255 |
+
| 0.6983 | 4100 | 0.0 | - |
|
256 |
+
| 0.7069 | 4150 | 0.0 | - |
|
257 |
+
| 0.7154 | 4200 | 0.0 | - |
|
258 |
+
| 0.7239 | 4250 | 0.0 | - |
|
259 |
+
| 0.7324 | 4300 | 0.0 | - |
|
260 |
+
| 0.7409 | 4350 | 0.0 | - |
|
261 |
+
| 0.7494 | 4400 | 0.0 | - |
|
262 |
+
| 0.7580 | 4450 | 0.0 | - |
|
263 |
+
| 0.7665 | 4500 | 0.0 | - |
|
264 |
+
| 0.7750 | 4550 | 0.0 | - |
|
265 |
+
| 0.7835 | 4600 | 0.0 | - |
|
266 |
+
| 0.7920 | 4650 | 0.0 | - |
|
267 |
+
| 0.8005 | 4700 | 0.0 | - |
|
268 |
+
| 0.8091 | 4750 | 0.0 | - |
|
269 |
+
| 0.8176 | 4800 | 0.0 | - |
|
270 |
+
| 0.8261 | 4850 | 0.0 | - |
|
271 |
+
| 0.8346 | 4900 | 0.0 | - |
|
272 |
+
| 0.8431 | 4950 | 0.0 | - |
|
273 |
+
| 0.8516 | 5000 | 0.0 | - |
|
274 |
+
| 0.8602 | 5050 | 0.0 | - |
|
275 |
+
| 0.8687 | 5100 | 0.0 | - |
|
276 |
+
| 0.8772 | 5150 | 0.0 | - |
|
277 |
+
| 0.8857 | 5200 | 0.0 | - |
|
278 |
+
| 0.8942 | 5250 | 0.0 | - |
|
279 |
+
| 0.9027 | 5300 | 0.0 | - |
|
280 |
+
| 0.9113 | 5350 | 0.0 | - |
|
281 |
+
| 0.9198 | 5400 | 0.0 | - |
|
282 |
+
| 0.9283 | 5450 | 0.0 | - |
|
283 |
+
| 0.9368 | 5500 | 0.0 | - |
|
284 |
+
| 0.9453 | 5550 | 0.0 | - |
|
285 |
+
| 0.9538 | 5600 | 0.0 | - |
|
286 |
+
| 0.9624 | 5650 | 0.0 | - |
|
287 |
+
| 0.9709 | 5700 | 0.0 | - |
|
288 |
+
| 0.9794 | 5750 | 0.0 | - |
|
289 |
+
| 0.9879 | 5800 | 0.0 | - |
|
290 |
+
| 0.9964 | 5850 | 0.0 | - |
|
291 |
+
|
292 |
+
### Framework Versions
|
293 |
+
- Python: 3.10.12
|
294 |
+
- SetFit: 1.1.0
|
295 |
+
- Sentence Transformers: 3.3.1
|
296 |
+
- Transformers: 4.42.2
|
297 |
+
- PyTorch: 2.5.1+cu121
|
298 |
+
- Datasets: 3.2.0
|
299 |
+
- Tokenizers: 0.19.1
|
300 |
+
|
301 |
+
## Citation
|
302 |
+
|
303 |
+
### BibTeX
|
304 |
+
```bibtex
|
305 |
+
@article{https://doi.org/10.48550/arxiv.2209.11055,
|
306 |
+
doi = {10.48550/ARXIV.2209.11055},
|
307 |
+
url = {https://arxiv.org/abs/2209.11055},
|
308 |
+
author = {Tunstall, Lewis and Reimers, Nils and Jo, Unso Eun Seo and Bates, Luke and Korat, Daniel and Wasserblat, Moshe and Pereg, Oren},
|
309 |
+
keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
|
310 |
+
title = {Efficient Few-Shot Learning Without Prompts},
|
311 |
+
publisher = {arXiv},
|
312 |
+
year = {2022},
|
313 |
+
copyright = {Creative Commons Attribution 4.0 International}
|
314 |
+
}
|
315 |
+
```
|
316 |
+
|
317 |
+
<!--
|
318 |
+
## Glossary
|
319 |
+
|
320 |
+
*Clearly define terms in order to be accessible across audiences.*
|
321 |
+
-->
|
322 |
+
|
323 |
+
<!--
|
324 |
+
## Model Card Authors
|
325 |
+
|
326 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
327 |
+
-->
|
328 |
+
|
329 |
+
<!--
|
330 |
+
## Model Card Contact
|
331 |
+
|
332 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
333 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
|
3 |
+
"architectures": [
|
4 |
+
"XLMRobertaModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"gradient_checkpointing": false,
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout_prob": 0.1,
|
13 |
+
"hidden_size": 768,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 3072,
|
16 |
+
"layer_norm_eps": 1e-05,
|
17 |
+
"max_position_embeddings": 514,
|
18 |
+
"model_type": "xlm-roberta",
|
19 |
+
"num_attention_heads": 12,
|
20 |
+
"num_hidden_layers": 12,
|
21 |
+
"output_past": true,
|
22 |
+
"pad_token_id": 1,
|
23 |
+
"position_embedding_type": "absolute",
|
24 |
+
"torch_dtype": "float32",
|
25 |
+
"transformers_version": "4.42.2",
|
26 |
+
"type_vocab_size": 1,
|
27 |
+
"use_cache": true,
|
28 |
+
"vocab_size": 250002
|
29 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.3.1",
|
4 |
+
"transformers": "4.42.2",
|
5 |
+
"pytorch": "2.5.1+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
config_setfit.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"normalize_embeddings": false,
|
3 |
+
"labels": [
|
4 |
+
"neutral",
|
5 |
+
"opposed",
|
6 |
+
"supportive"
|
7 |
+
]
|
8 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccc9d32a6303db5d6d24495776aa29b8c104d065dee9fb19e00d13e534a829ce
|
3 |
+
size 1112197096
|
model_head.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:341ea597c19ab5c651bca19ac164528ffda6faf7474153fd684d3c971a0b04cf
|
3 |
+
size 19423
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 128,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
sentencepiece.bpe.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
3 |
+
size 5069051
|
special_tokens_map.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "<mask>",
|
25 |
+
"lstrip": true,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "<pad>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "</s>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "<unk>",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cad551d5600a84242d0973327029452a1e3672ba6313c2a3c3d69c4310e12719
|
3 |
+
size 17082987
|
tokenizer_config.json
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "<s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<pad>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "</s>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "<unk>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"250001": {
|
36 |
+
"content": "<mask>",
|
37 |
+
"lstrip": true,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"bos_token": "<s>",
|
45 |
+
"clean_up_tokenization_spaces": true,
|
46 |
+
"cls_token": "<s>",
|
47 |
+
"eos_token": "</s>",
|
48 |
+
"mask_token": "<mask>",
|
49 |
+
"max_length": 128,
|
50 |
+
"model_max_length": 128,
|
51 |
+
"pad_to_multiple_of": null,
|
52 |
+
"pad_token": "<pad>",
|
53 |
+
"pad_token_type_id": 0,
|
54 |
+
"padding_side": "right",
|
55 |
+
"sep_token": "</s>",
|
56 |
+
"stride": 0,
|
57 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
58 |
+
"truncation_side": "right",
|
59 |
+
"truncation_strategy": "longest_first",
|
60 |
+
"unk_token": "<unk>"
|
61 |
+
}
|