File size: 3,932 Bytes
f2b6412 9e2d1d3 f2b6412 36fbe13 f2b6412 36fbe13 9e2d1d3 f2b6412 36fbe13 f2b6412 36fbe13 f2b6412 36fbe13 f2b6412 36fbe13 f2b6412 36fbe13 f2b6412 36fbe13 f2b6412 36fbe13 f2b6412 36fbe13 4590734 f2b6412 7f1c71a 0ae7dcb 905ca72 4599208 f2b6412 30b7184 4599208 f2b6412 b17a070 97557fd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import streamlit as st
from construction_prediction.constants import load_w2v
from construction_prediction.construction_calculator import get_collocates_for_word_type
st.title('Construction Calculator')
form = st.form('Form')
target_word = form.text_input(label='Input the target word:',
placeholder='Input the target word',
label_visibility='collapsed'
)
target_word_pos = form.selectbox(label='Specify the part of speech for the target word:',
options=['ADJ', 'NOUN'],
index=None,
placeholder='Specify the part of speech for the target word',
label_visibility='collapsed'
)
current_model = form.selectbox(label='MODEL',
options=['MODEL 1: nplus', 'MODEL 2: fontanka',
'MODEL 3: librusec', 'MODEL 4: stihi_ru'],
index=None,
placeholder='Choose a collocate selection model',
label_visibility='collapsed'
)
restrict_vocab = form.text_area(label='Restrict vocab',
value='',
placeholder='Restrict vocab',
label_visibility='collapsed'
)
collocate_number = form.number_input(label='The number of collocates in the output:',
min_value=1,
step=1,
value=10,
format='%i',
placeholder='The number of collocates in the output',
# label_visibility='collapsed'
)
form_button = form.form_submit_button('Run')
if form_button:
if not target_word:
st.error("You didn't input the target word")
st.stop()
if not target_word_pos:
st.error("You didn't specify the part of speech for the target word")
st.stop()
if not current_model:
st.error("You didn't choose the model for the collocate selection")
st.stop()
if current_model == 'MODEL 1: nplus':
model = load_w2v('models/nplus1_word2vec.bin')
elif current_model == 'MODEL 2: fontanka':
model = load_w2v('models/fontanka_word2vec.bin')
elif current_model == 'MODEL 3: librusec':
model = load_w2v('models/librusec_word2vec.bin')
else:
model = load_w2v('models/stihi_ru_word2vec.bin')
if '_'.join((target_word, target_word_pos)) not in model.index_to_key:
st.error("The word you entered is not present in the model")
st.stop()
try:
restrict_vocab = int(restrict_vocab.strip())
except ValueError:
restrict_vocab = None
output = get_collocates_for_word_type(model=model,
word=target_word,
target_pos=target_word_pos,
topn=collocate_number,
restrict_vocab=restrict_vocab)
st.write(output)
st.write('''<div style="text-align: justify;"><b>Related article:</b> Petrushenko L., Mitrofanova O. Predicting Style-Dependent Collocations in Russian Text Corpora. //
The 17th Workshop on Recent Advances in Slavonic Natural Languages Processing, RASLAN 2023, Kouty nad Desnou, Czech Republic, December 8-10, 2023. –
pp. 79–89. – URL: <a href="http://nlp.fi.muni.cz/raslan/2023/paper13.pdf">http://nlp.fi.muni.cz/raslan/2023/paper13.pdf</a></div>''', unsafe_allow_html=True) |