Spaces:
Runtime error
Runtime error
adding 2022 model
Browse files
app.py
CHANGED
@@ -7,7 +7,9 @@ description = "Based on TimeLMs which is a RoBERTa model finetuned on tweets at
|
|
7 |
article = "This outputs the top 500 similar tokens to the input word, as a list. Stay tuned for more info"
|
8 |
|
9 |
available_models = ['2019',
|
10 |
-
'2020'
|
|
|
|
|
11 |
|
12 |
model_2019 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
|
13 |
tokenizers_2019 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
|
@@ -32,6 +34,17 @@ knn_model_2020 = NearestNeighbors(n_neighbors=500,
|
|
32 |
nbrs_2020 = knn_model_2020.fit(embedding_matrix_2020)
|
33 |
distances_2020, indices_2020 = nbrs_2020.kneighbors(embedding_matrix_2020)
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
title = "How does a word's meaning change with time?"
|
37 |
|
@@ -40,16 +53,26 @@ def topk(word,model):
|
|
40 |
|
41 |
if model == '2019':
|
42 |
index = tokenizers_2019.encode(f'{word}')
|
|
|
43 |
for i in indices_2019[index[1]]:
|
44 |
outs.append(tokenizers_2019.decode(i))
|
45 |
-
print(tokenizers_2019.decode(i))
|
46 |
return outs
|
47 |
|
48 |
if model == '2020':
|
49 |
index = tokenizers_2020.encode(f'{word}')
|
|
|
50 |
for i in indices_2020[index[1]]:
|
51 |
outs.append(tokenizers_2020.decode(i))
|
52 |
-
print(tokenizers_2020.decode(i))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
return outs
|
54 |
|
55 |
# with gr.Blocks() as demo:
|
|
|
7 |
article = "This outputs the top 500 similar tokens to the input word, as a list. Stay tuned for more info"
|
8 |
|
9 |
available_models = ['2019',
|
10 |
+
'2020',
|
11 |
+
'2022'
|
12 |
+
]
|
13 |
|
14 |
model_2019 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
|
15 |
tokenizers_2019 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-2019-90m')
|
|
|
34 |
nbrs_2020 = knn_model_2020.fit(embedding_matrix_2020)
|
35 |
distances_2020, indices_2020 = nbrs_2020.kneighbors(embedding_matrix_2020)
|
36 |
|
37 |
+
model_2022 = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-2022-154m')
|
38 |
+
tokenizers_2022 = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-jun2020')
|
39 |
+
embedding_matrix_2022 = model_2022.embeddings.word_embeddings.weight
|
40 |
+
embedding_matrix_2022 = embedding_matrix_2022.detach().numpy()
|
41 |
+
knn_model_2022 = NearestNeighbors(n_neighbors=500,
|
42 |
+
metric='cosine',
|
43 |
+
algorithm='auto',
|
44 |
+
n_jobs=3)
|
45 |
+
nbrs_2022 = knn_model_2022.fit(embedding_matrix_2022)
|
46 |
+
distances_2022, indices_2022 = nbrs_2020.kneighbors(embedding_matrix_2022)
|
47 |
+
|
48 |
|
49 |
title = "How does a word's meaning change with time?"
|
50 |
|
|
|
53 |
|
54 |
if model == '2019':
|
55 |
index = tokenizers_2019.encode(f'{word}')
|
56 |
+
print(index)
|
57 |
for i in indices_2019[index[1]]:
|
58 |
outs.append(tokenizers_2019.decode(i))
|
59 |
+
# print(tokenizers_2019.decode(i))
|
60 |
return outs
|
61 |
|
62 |
if model == '2020':
|
63 |
index = tokenizers_2020.encode(f'{word}')
|
64 |
+
print(index)
|
65 |
for i in indices_2020[index[1]]:
|
66 |
outs.append(tokenizers_2020.decode(i))
|
67 |
+
# print(tokenizers_2020.decode(i))
|
68 |
+
return outs
|
69 |
+
|
70 |
+
if model == '2022':
|
71 |
+
index = tokenizers_2022.encode(f'{word}')
|
72 |
+
print(index)
|
73 |
+
for i in indices_2022[index[1]]:
|
74 |
+
outs.append(tokenizers_2022.decode(i))
|
75 |
+
# print(tokenizers_2022decode(i))
|
76 |
return outs
|
77 |
|
78 |
# with gr.Blocks() as demo:
|