Update README.md
Browse files
README.md
CHANGED
@@ -34,22 +34,19 @@ import torch
|
|
34 |
|
35 |
from transformers import AutoModel, AutoTokenizer
|
36 |
|
37 |
-
|
38 |
-
|
39 |
### German example
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
46 |
|
47 |
# Tokenize input sentence
|
48 |
inputs = tokenizer(sentence, padding=True, truncation=True, return_tensors="pt", max_length=512)
|
49 |
|
50 |
-
# Set the model to evaluation mode
|
51 |
-
model.eval()
|
52 |
-
|
53 |
# Take tokenized input and pass it through the model
|
54 |
with torch.no_grad():
|
55 |
outputs = model(**inputs)
|
@@ -74,7 +71,6 @@ tensor([[ 5.6306e-02, -2.8375e-01, -4.1495e-02, 7.4393e-02, -3.1552e-01,
|
|
74 |
|
75 |
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
76 |
This model has been trained on news articles only. Hence, it might not perform as well on other text classes.
|
77 |
-
This multilingual model has not been fine-tuned for cross-lingual transfer. It is intended for computing sentence embeddings that can be compared mono-lingually.
|
78 |
|
79 |
## Training Details
|
80 |
|
|
|
34 |
|
35 |
from transformers import AutoModel, AutoTokenizer
|
36 |
|
|
|
|
|
37 |
### German example
|
38 |
|
39 |
+
# Load swissBERT for sentence embeddings model
|
40 |
+
model_name="jgrosjean-mathesis/swissbert-for-sentence-embeddings"
|
41 |
+
model = AutoModel.from_pretrained(model_name)
|
42 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
43 |
+
model.set_default_language("de_CH")
|
44 |
+
|
45 |
+
def generate_sentence_embedding(sentence, ):
|
46 |
|
47 |
# Tokenize input sentence
|
48 |
inputs = tokenizer(sentence, padding=True, truncation=True, return_tensors="pt", max_length=512)
|
49 |
|
|
|
|
|
|
|
50 |
# Take tokenized input and pass it through the model
|
51 |
with torch.no_grad():
|
52 |
outputs = model(**inputs)
|
|
|
71 |
|
72 |
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
73 |
This model has been trained on news articles only. Hence, it might not perform as well on other text classes.
|
|
|
74 |
|
75 |
## Training Details
|
76 |
|