|
import gradio as gr |
|
import gradio.inputs as grinputs |
|
import gradio.outputs as groutputs |
|
|
|
from gensim.models import KeyedVectors |
|
from gensim.parsing import preprocessing |
|
|
|
filters = [ |
|
preprocessing.remove_stopwords, |
|
preprocessing.strip_tags, |
|
preprocessing.strip_punctuation, |
|
preprocessing.strip_numeric, |
|
preprocessing.strip_multiple_whitespaces, |
|
preprocessing.strip_non_alphanum, |
|
preprocessing.strip_short, |
|
preprocessing.remove_stopwords, |
|
preprocessing.lower_to_unicode, |
|
] |
|
|
|
def parse_text(text): |
|
return text.replace(" ", "").replace(";", ",").split(',') |
|
|
|
def clean_words(words): |
|
clean_dict = {} |
|
for (word, score) in words: |
|
prep_word = preprocessing.preprocess_string(word, filters=filters) |
|
if len(prep_word) > 0: |
|
word = prep_word[0] |
|
is_clean = sum( [word.startswith(clean_word) or word.endswith(clean_word) for clean_word in clean_dict.keys()] ) == 0 |
|
if is_clean: |
|
clean_dict[word] = round(score, 2) |
|
return clean_dict |
|
|
|
path = "cc.en.300.vec" |
|
|
|
m = KeyedVectors.load_word2vec_format(path, limit = 100000) |
|
|
|
def on_submit(text, mode): |
|
print('{} mode'.format(mode)) |
|
positive = parse_text(text) |
|
if mode=='Close': |
|
words = m.most_similar(positive=positive, topn=50) |
|
else: |
|
words = m.most_similar(positive=positive, topn=10000)[::-1] |
|
return str(clean_words(words))[1:-1] |
|
|
|
iface = gr.Interface( |
|
fn=on_submit, |
|
inputs=[ |
|
grinputs.Textbox(placeholder='word1, word2, word3, ...', label="Input words (coma separated). Returns words that are close (or far) from the input words."), |
|
grinputs.Radio(['Close', 'Far'], label="Close of Far mode")], |
|
outputs=[ |
|
groutputs.Textbox(label='Information')], |
|
allow_screenshot=False |
|
) |
|
iface.launch() |