ladapetrushenko's picture
Update app.py
97557fd verified
import streamlit as st
from construction_prediction.constants import load_w2v
from construction_prediction.construction_calculator import get_collocates_for_word_type
st.title('Construction Calculator')
form = st.form('Form')
target_word = form.text_input(label='Input the target word:',
placeholder='Input the target word',
label_visibility='collapsed'
)
target_word_pos = form.selectbox(label='Specify the part of speech for the target word:',
options=['ADJ', 'NOUN'],
index=None,
placeholder='Specify the part of speech for the target word',
label_visibility='collapsed'
)
current_model = form.selectbox(label='MODEL',
options=['MODEL 1: nplus', 'MODEL 2: fontanka',
'MODEL 3: librusec', 'MODEL 4: stihi_ru'],
index=None,
placeholder='Choose a collocate selection model',
label_visibility='collapsed'
)
restrict_vocab = form.text_area(label='Restrict vocab',
value='',
placeholder='Restrict vocab',
label_visibility='collapsed'
)
collocate_number = form.number_input(label='The number of collocates in the output:',
min_value=1,
step=1,
value=10,
format='%i',
placeholder='The number of collocates in the output',
# label_visibility='collapsed'
)
form_button = form.form_submit_button('Run')
if form_button:
if not target_word:
st.error("You didn't input the target word")
st.stop()
if not target_word_pos:
st.error("You didn't specify the part of speech for the target word")
st.stop()
if not current_model:
st.error("You didn't choose the model for the collocate selection")
st.stop()
if current_model == 'MODEL 1: nplus':
model = load_w2v('models/nplus1_word2vec.bin')
elif current_model == 'MODEL 2: fontanka':
model = load_w2v('models/fontanka_word2vec.bin')
elif current_model == 'MODEL 3: librusec':
model = load_w2v('models/librusec_word2vec.bin')
else:
model = load_w2v('models/stihi_ru_word2vec.bin')
if '_'.join((target_word, target_word_pos)) not in model.index_to_key:
st.error("The word you entered is not present in the model")
st.stop()
try:
restrict_vocab = int(restrict_vocab.strip())
except ValueError:
restrict_vocab = None
output = get_collocates_for_word_type(model=model,
word=target_word,
target_pos=target_word_pos,
topn=collocate_number,
restrict_vocab=restrict_vocab)
st.write(output)
st.write('''<div style="text-align: justify;"><b>Related article:</b> Petrushenko L., Mitrofanova O. Predicting Style-Dependent Collocations in Russian Text Corpora. //
The 17th Workshop on Recent Advances in Slavonic Natural Languages Processing, RASLAN 2023, Kouty nad Desnou, Czech Republic, December 8-10, 2023. –
pp. 79–89. – URL: <a href="http://nlp.fi.muni.cz/raslan/2023/paper13.pdf">http://nlp.fi.muni.cz/raslan/2023/paper13.pdf</a></div>''', unsafe_allow_html=True)