Spaces:

pierrefdz
/

semantle

Runtime error

App Files Files Community

semantle / app.py

pierrefdz

Update app.py

f5fc7cb almost 3 years ago

raw

history blame contribute delete

1.84 kB

	import gradio as gr
	import gradio.inputs as grinputs
	import gradio.outputs as groutputs

	from gensim.models import KeyedVectors
	from gensim.parsing import preprocessing

	filters = [
	preprocessing.remove_stopwords,
	preprocessing.strip_tags,
	preprocessing.strip_punctuation,
	preprocessing.strip_numeric,
	preprocessing.strip_multiple_whitespaces,
	preprocessing.strip_non_alphanum,
	preprocessing.strip_short,
	preprocessing.remove_stopwords,
	preprocessing.lower_to_unicode,
	]

	def parse_text(text):
	return text.replace(" ", "").replace(";", ",").split(',')

	def clean_words(words):
	clean_dict = {}
	for (word, score) in words:
	prep_word = preprocessing.preprocess_string(word, filters=filters)
	if len(prep_word) > 0:
	word = prep_word[0]
	is_clean = sum( [word.startswith(clean_word) or word.endswith(clean_word) for clean_word in clean_dict.keys()] ) == 0
	if is_clean:
	clean_dict[word] = round(score, 2)
	return clean_dict

	path = "cc.en.300.vec"
	# path = "cc.fr.300.vec"
	m = KeyedVectors.load_word2vec_format(path, limit = 100000)

	def on_submit(text, mode):
	print('{} mode'.format(mode))
	positive = parse_text(text)
	if mode=='Close':
	words = m.most_similar(positive=positive, topn=50)
	else:
	words = m.most_similar(positive=positive, topn=10000)[::-1]
	return str(clean_words(words))[1:-1]

	iface = gr.Interface(
	fn=on_submit,
	inputs=[
	grinputs.Textbox(placeholder='word1, word2, word3, ...', label="Input words (coma separated). Returns words that are close (or far) from the input words."),
	grinputs.Radio(['Close', 'Far'], label="Close of Far mode")],
	outputs=[
	groutputs.Textbox(label='Information')],
	allow_screenshot=False
	)
	iface.launch()