semantle / app.py
pierrefdz's picture
Update app.py
f5fc7cb
import gradio as gr
import gradio.inputs as grinputs
import gradio.outputs as groutputs
from gensim.models import KeyedVectors
from gensim.parsing import preprocessing
filters = [
preprocessing.remove_stopwords,
preprocessing.strip_tags,
preprocessing.strip_punctuation,
preprocessing.strip_numeric,
preprocessing.strip_multiple_whitespaces,
preprocessing.strip_non_alphanum,
preprocessing.strip_short,
preprocessing.remove_stopwords,
preprocessing.lower_to_unicode,
]
def parse_text(text):
return text.replace(" ", "").replace(";", ",").split(',')
def clean_words(words):
clean_dict = {}
for (word, score) in words:
prep_word = preprocessing.preprocess_string(word, filters=filters)
if len(prep_word) > 0:
word = prep_word[0]
is_clean = sum( [word.startswith(clean_word) or word.endswith(clean_word) for clean_word in clean_dict.keys()] ) == 0
if is_clean:
clean_dict[word] = round(score, 2)
return clean_dict
path = "cc.en.300.vec"
# path = "cc.fr.300.vec"
m = KeyedVectors.load_word2vec_format(path, limit = 100000)
def on_submit(text, mode):
print('{} mode'.format(mode))
positive = parse_text(text)
if mode=='Close':
words = m.most_similar(positive=positive, topn=50)
else:
words = m.most_similar(positive=positive, topn=10000)[::-1]
return str(clean_words(words))[1:-1]
iface = gr.Interface(
fn=on_submit,
inputs=[
grinputs.Textbox(placeholder='word1, word2, word3, ...', label="Input words (coma separated). Returns words that are close (or far) from the input words."),
grinputs.Radio(['Close', 'Far'], label="Close of Far mode")],
outputs=[
groutputs.Textbox(label='Information')],
allow_screenshot=False
)
iface.launch()