Figea commited on
Commit
c55ac1a
·
verified ·
1 Parent(s): 0fa0230

Update src/synonyms_preprocess.py

Browse files
Files changed (1) hide show
  1. src/synonyms_preprocess.py +12 -8
src/synonyms_preprocess.py CHANGED
@@ -28,16 +28,20 @@ def find_synonyms(word, model, dict_embedding, dict_2000_tokens): #cluster_to_wo
28
  """
29
  This function finds the most similar word in the same cluster, and excludes antonyms
30
  """
31
- antonyms = find_antonyms(word)
32
- dict_2000_tokens_less_antonyms = [token for token in dict_2000_tokens if token not in antonyms]
33
 
34
- word_embedding = model(word)
 
 
 
 
35
 
36
- similarities=[]
 
 
37
 
38
- for token in dict_2000_tokens_less_antonyms:
39
- similarities.append((token, dict_embedding.get(token).similarity(word_embedding)))
40
 
41
- most_similar_token = sorted(similarities, key=lambda item: -item[1])[0][0]
42
 
43
- return most_similar_token
 
28
  """
29
  This function finds the most similar word in the same cluster, and excludes antonyms
30
  """
 
 
31
 
32
+ if word in dict_2000_tokens:
33
+ return word
34
+ else:
35
+ antonyms = find_antonyms(word)
36
+ dict_2000_tokens_less_antonyms = [token for token in dict_2000_tokens if token not in antonyms]
37
 
38
+ word_embedding = model(word)
39
+
40
+ similarities=[]
41
 
42
+ for token in dict_2000_tokens_less_antonyms:
43
+ similarities.append((token, dict_embedding.get(token).similarity(word_embedding)))
44
 
45
+ most_similar_token = sorted(similarities, key=lambda item: -item[1])[0][0]
46
 
47
+ return most_similar_token