luisespinosa
commited on
Commit
·
7456b1b
1
Parent(s):
bdba8b7
Update README.md
Browse files
README.md
CHANGED
@@ -60,6 +60,54 @@ I am so <mask> 😢
|
|
60 |
5) hungry 0.0232
|
61 |
```
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
## Example Feature Extraction
|
64 |
|
65 |
```python
|
|
|
60 |
5) hungry 0.0232
|
61 |
```
|
62 |
|
63 |
+
## Example Tweet Embeddings
|
64 |
+
```python
|
65 |
+
from transformers import AutoTokenizer, AutoModel, TFAutoModel
|
66 |
+
import numpy as np
|
67 |
+
from scipy.spatial.distance import cosine
|
68 |
+
from collections import defaultdict
|
69 |
+
|
70 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
71 |
+
model = AutoModel.from_pretrained(MODEL)
|
72 |
+
|
73 |
+
def get_embedding(text):
|
74 |
+
text = preprocess(text)
|
75 |
+
encoded_input = tokenizer(text, return_tensors='pt')
|
76 |
+
features = model(**encoded_input)
|
77 |
+
features = features[0].detach().cpu().numpy()
|
78 |
+
features_mean = np.mean(features[0], axis=0)
|
79 |
+
return features_mean
|
80 |
+
|
81 |
+
MODEL = "cardiffnlp/twitter-roberta-base"
|
82 |
+
|
83 |
+
query = "The book was awesome"
|
84 |
+
|
85 |
+
tweets = ["I just ordered fried chicken 🐣",
|
86 |
+
"The movie was great",
|
87 |
+
"What time is the next game?",
|
88 |
+
"Just finished reading 'Embeddings in NLP'"]
|
89 |
+
|
90 |
+
d = defaultdict(int)
|
91 |
+
for tweet in tweets:
|
92 |
+
sim = 1-cosine(get_embedding(query),get_embedding(tweet))
|
93 |
+
d[tweet] = sim
|
94 |
+
|
95 |
+
print('Most similar to: ',query)
|
96 |
+
print('----------------------------------------')
|
97 |
+
for idx,x in enumerate(sorted(d.items(), key=lambda x:x[1], reverse=True)):
|
98 |
+
print(idx+1,x[0])
|
99 |
+
```
|
100 |
+
Output:
|
101 |
+
|
102 |
+
```
|
103 |
+
Most similar to: The book was awesome
|
104 |
+
----------------------------------------
|
105 |
+
1 The movie was great
|
106 |
+
2 Just finished reading 'Embeddings in NLP'
|
107 |
+
3 I just ordered fried chicken 🐣
|
108 |
+
4 What time is the next game?
|
109 |
+
```
|
110 |
+
|
111 |
## Example Feature Extraction
|
112 |
|
113 |
```python
|