Spaces:
Building
Building
Update src/synonyms_preprocess.py
Browse files- src/synonyms_preprocess.py +12 -8
src/synonyms_preprocess.py
CHANGED
@@ -28,16 +28,20 @@ def find_synonyms(word, model, dict_embedding, dict_2000_tokens): #cluster_to_wo
|
|
28 |
"""
|
29 |
This function finds the most similar word in the same cluster, and excludes antonyms
|
30 |
"""
|
31 |
-
antonyms = find_antonyms(word)
|
32 |
-
dict_2000_tokens_less_antonyms = [token for token in dict_2000_tokens if token not in antonyms]
|
33 |
|
34 |
-
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
|
|
|
|
|
37 |
|
38 |
-
|
39 |
-
|
40 |
|
41 |
-
|
42 |
|
43 |
-
|
|
|
28 |
"""
|
29 |
This function finds the most similar word in the same cluster, and excludes antonyms
|
30 |
"""
|
|
|
|
|
31 |
|
32 |
+
if word in dict_2000_tokens:
|
33 |
+
return word
|
34 |
+
else:
|
35 |
+
antonyms = find_antonyms(word)
|
36 |
+
dict_2000_tokens_less_antonyms = [token for token in dict_2000_tokens if token not in antonyms]
|
37 |
|
38 |
+
word_embedding = model(word)
|
39 |
+
|
40 |
+
similarities=[]
|
41 |
|
42 |
+
for token in dict_2000_tokens_less_antonyms:
|
43 |
+
similarities.append((token, dict_embedding.get(token).similarity(word_embedding)))
|
44 |
|
45 |
+
most_similar_token = sorted(similarities, key=lambda item: -item[1])[0][0]
|
46 |
|
47 |
+
return most_similar_token
|