Spaces:

ladapetrushenko
/

construction_calculator

Running

App Files Files Community

construction_calculator / app.py

ladapetrushenko

Update app.py

97557fd verified 4 months ago

raw

history blame contribute delete

3.93 kB

	import streamlit as st
	from construction_prediction.constants import load_w2v
	from construction_prediction.construction_calculator import get_collocates_for_word_type

	st.title('Construction Calculator')
	form = st.form('Form')
	target_word = form.text_input(label='Input the target word:',
	placeholder='Input the target word',
	label_visibility='collapsed'
	)
	target_word_pos = form.selectbox(label='Specify the part of speech for the target word:',
	options=['ADJ', 'NOUN'],
	index=None,
	placeholder='Specify the part of speech for the target word',
	label_visibility='collapsed'
	)
	current_model = form.selectbox(label='MODEL',
	options=['MODEL 1: nplus', 'MODEL 2: fontanka',
	'MODEL 3: librusec', 'MODEL 4: stihi_ru'],
	index=None,
	placeholder='Choose a collocate selection model',
	label_visibility='collapsed'
	)
	restrict_vocab = form.text_area(label='Restrict vocab',
	value='',
	placeholder='Restrict vocab',
	label_visibility='collapsed'
	)
	collocate_number = form.number_input(label='The number of collocates in the output:',
	min_value=1,
	step=1,
	value=10,
	format='%i',
	placeholder='The number of collocates in the output',
	# label_visibility='collapsed'
	)
	form_button = form.form_submit_button('Run')

	if form_button:
	if not target_word:
	st.error("You didn't input the target word")
	st.stop()
	if not target_word_pos:
	st.error("You didn't specify the part of speech for the target word")
	st.stop()
	if not current_model:
	st.error("You didn't choose the model for the collocate selection")
	st.stop()

	if current_model == 'MODEL 1: nplus':
	model = load_w2v('models/nplus1_word2vec.bin')
	elif current_model == 'MODEL 2: fontanka':
	model = load_w2v('models/fontanka_word2vec.bin')
	elif current_model == 'MODEL 3: librusec':
	model = load_w2v('models/librusec_word2vec.bin')
	else:
	model = load_w2v('models/stihi_ru_word2vec.bin')

	if '_'.join((target_word, target_word_pos)) not in model.index_to_key:
	st.error("The word you entered is not present in the model")
	st.stop()

	try:
	restrict_vocab = int(restrict_vocab.strip())
	except ValueError:
	restrict_vocab = None

	output = get_collocates_for_word_type(model=model,
	word=target_word,
	target_pos=target_word_pos,
	topn=collocate_number,
	restrict_vocab=restrict_vocab)

	st.write(output)

	st.write('''<div style="text-align: justify;"><b>Related article:</b> Petrushenko L., Mitrofanova O. Predicting Style-Dependent Collocations in Russian Text Corpora. //
	The 17th Workshop on Recent Advances in Slavonic Natural Languages Processing, RASLAN 2023, Kouty nad Desnou, Czech Republic, December 8-10, 2023. –
	pp. 79–89. – URL: <a href="http://nlp.fi.muni.cz/raslan/2023/paper13.pdf">http://nlp.fi.muni.cz/raslan/2023/paper13.pdf</a></div>''', unsafe_allow_html=True)