Ilde commited on
Commit
26166f1
·
1 Parent(s): 7937f0f

first vers files

Browse files
Files changed (4) hide show
  1. app.py +43 -0
  2. requirements.txt +2 -0
  3. smaller_model_spa.txt +3 -0
  4. stop_words.pkl +3 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gensim.models.keyedvectors import KeyedVectors
2
+ import pickle
3
+ from nltk.tokenize import word_tokenize
4
+ import gradio as gr
5
+
6
+
7
+ # Use gensim Keyvectors to read the embbedings
8
+ wordvectors_file_vec = 'smaller_model_spa.txt'
9
+ smaller_model = KeyedVectors.load_word2vec_format(wordvectors_file_vec)
10
+ with open('stop_words.pkl', 'rb') as f:
11
+ stop_words = pickle.load(f)
12
+
13
+
14
+ def filter_words(x):
15
+ word_tokens = word_tokenize(x)
16
+ filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words]
17
+ return filtered_sentence
18
+
19
+ def reverse_dictionary(definicion):
20
+ words = filter_words(definicion)
21
+ list_similar = smaller_model.most_similar_cosmul(positive= words)
22
+ return list_to_html(list_similar)
23
+
24
+ def list_to_html(lst, title="Results"):
25
+ html_str = f"<h3>{title}</h3><ul>"
26
+ for word, score in lst:
27
+ html_str += f"<li><b>{word}</b>: {score:.2f}</li>"
28
+ html_str += "</ul>"
29
+ return html_str
30
+
31
+
32
+ title = "Diccionario inverso en español"
33
+ description = "Un diccionario inverso utilizando embeddings Word2Vec de SBWCA y filtrando palabras de Wikcionario. Creado como demo para Gradio y HuggingFace Spaces."
34
+ examples = ['angustia porque se te olvido algo']
35
+
36
+
37
+ gr.Interface(fn = reverse_dictionary,
38
+ inputs = gr.inputs.Textbox(lines=5, placeholder="Enter your text here..."),
39
+ outputs= gr.outputs.HTML(),
40
+ title = title,
41
+ description = description,
42
+ examples = examples).launch(share=True)
43
+
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gensim
2
+ nltk
smaller_model_spa.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33bfe210640107736d63109016e474d178f7bce196c0bec951a3abad7c83913a
3
+ size 491155883
stop_words.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60132a636523571f9be86856ab0ce292e842cda456d1dfebb81c8997ef156775
3
+ size 2818