|
import streamlit as st |
|
from construction_prediction.constants import load_w2v |
|
from construction_prediction.construction_calculator import get_collocates_for_word_type |
|
|
|
st.title('Construction Calculator') |
|
form = st.form('Form') |
|
target_word = form.text_input(label='Input the target word:', |
|
placeholder='Input the target word', |
|
label_visibility='collapsed' |
|
) |
|
target_word_pos = form.selectbox(label='Specify the part of speech for the target word:', |
|
options=['ADJ', 'NOUN'], |
|
index=None, |
|
placeholder='Specify the part of speech for the target word', |
|
label_visibility='collapsed' |
|
) |
|
current_model = form.selectbox(label='MODEL', |
|
options=['MODEL 1: nplus', 'MODEL 2: fontanka', |
|
'MODEL 3: librusec', 'MODEL 4: stihi_ru'], |
|
index=None, |
|
placeholder='Choose a collocate selection model', |
|
label_visibility='collapsed' |
|
) |
|
restrict_vocab = form.text_area(label='Restrict vocab', |
|
value='', |
|
placeholder='Restrict vocab', |
|
label_visibility='collapsed' |
|
) |
|
collocate_number = form.number_input(label='The number of collocates in the output:', |
|
min_value=1, |
|
step=1, |
|
value=10, |
|
format='%i', |
|
placeholder='The number of collocates in the output', |
|
|
|
) |
|
form_button = form.form_submit_button('Run') |
|
|
|
if form_button: |
|
if not target_word: |
|
st.error("You didn't input the target word") |
|
st.stop() |
|
if not target_word_pos: |
|
st.error("You didn't specify the part of speech for the target word") |
|
st.stop() |
|
if not current_model: |
|
st.error("You didn't choose the model for the collocate selection") |
|
st.stop() |
|
|
|
if current_model == 'MODEL 1: nplus': |
|
model = load_w2v('models/nplus1_word2vec.bin') |
|
elif current_model == 'MODEL 2: fontanka': |
|
model = load_w2v('models/fontanka_word2vec.bin') |
|
elif current_model == 'MODEL 3: librusec': |
|
model = load_w2v('models/librusec_word2vec.bin') |
|
else: |
|
model = load_w2v('models/stihi_ru_word2vec.bin') |
|
|
|
if '_'.join((target_word, target_word_pos)) not in model.index_to_key: |
|
st.error("The word you entered is not present in the model") |
|
st.stop() |
|
|
|
try: |
|
restrict_vocab = int(restrict_vocab.strip()) |
|
except ValueError: |
|
restrict_vocab = None |
|
|
|
output = get_collocates_for_word_type(model=model, |
|
word=target_word, |
|
target_pos=target_word_pos, |
|
topn=collocate_number, |
|
restrict_vocab=restrict_vocab) |
|
|
|
st.write(output) |
|
|
|
st.write('''<div style="text-align: justify;"><b>Related article:</b> Petrushenko L., Mitrofanova O. Predicting Style-Dependent Collocations in Russian Text Corpora. // |
|
The 17th Workshop on Recent Advances in Slavonic Natural Languages Processing, RASLAN 2023, Kouty nad Desnou, Czech Republic, December 8-10, 2023. β |
|
pp. 79β89. β URL: <a href="http://nlp.fi.muni.cz/raslan/2023/paper13.pdf">http://nlp.fi.muni.cz/raslan/2023/paper13.pdf</a></div>''', unsafe_allow_html=True) |