File size: 3,932 Bytes
f2b6412
 
 
 
9e2d1d3
f2b6412
36fbe13
 
f2b6412
 
36fbe13
9e2d1d3
f2b6412
36fbe13
f2b6412
 
 
 
 
 
36fbe13
f2b6412
 
 
 
 
 
 
36fbe13
f2b6412
 
 
 
36fbe13
f2b6412
 
36fbe13
f2b6412
 
 
36fbe13
f2b6412
 
36fbe13
f2b6412
 
36fbe13
4590734
f2b6412
 
 
 
 
 
 
 
 
 
7f1c71a
0ae7dcb
 
905ca72
4599208
 
 
 
 
f2b6412
 
 
 
30b7184
4599208
f2b6412
b17a070
97557fd
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import streamlit as st
from construction_prediction.constants import load_w2v
from construction_prediction.construction_calculator import get_collocates_for_word_type

st.title('Construction Calculator')
form = st.form('Form')
target_word = form.text_input(label='Input the target word:',
                              placeholder='Input the target word',
                              label_visibility='collapsed'
                              )
target_word_pos = form.selectbox(label='Specify the part of speech for the target word:',
                                 options=['ADJ', 'NOUN'],
                                 index=None,
                                 placeholder='Specify the part of speech for the target word',
                                 label_visibility='collapsed'
                                 )
current_model = form.selectbox(label='MODEL',
                               options=['MODEL 1: nplus', 'MODEL 2: fontanka',
                                        'MODEL 3: librusec', 'MODEL 4: stihi_ru'],
                               index=None,
                               placeholder='Choose a collocate selection model',
                               label_visibility='collapsed'
                               )
restrict_vocab = form.text_area(label='Restrict vocab',
                                value='',
                                placeholder='Restrict vocab',
                                label_visibility='collapsed'
                                )
collocate_number = form.number_input(label='The number of collocates in the output:',
                                     min_value=1,
                                     step=1,
                                     value=10,
                                     format='%i',
                                     placeholder='The number of collocates in the output',
                                     # label_visibility='collapsed'
                                     )
form_button = form.form_submit_button('Run')

if form_button:
    if not target_word:
        st.error("You didn't input the target word")
        st.stop()
    if not target_word_pos:
        st.error("You didn't specify the part of speech for the target word")
        st.stop()
    if not current_model:
        st.error("You didn't choose the model for the collocate selection")
        st.stop() 

    if current_model == 'MODEL 1: nplus':
        model = load_w2v('models/nplus1_word2vec.bin')
    elif current_model == 'MODEL 2: fontanka':
        model = load_w2v('models/fontanka_word2vec.bin')
    elif current_model == 'MODEL 3: librusec':
        model = load_w2v('models/librusec_word2vec.bin')
    else:
        model = load_w2v('models/stihi_ru_word2vec.bin')

    if '_'.join((target_word, target_word_pos)) not in model.index_to_key:
        st.error("The word you entered is not present in the model")
        st.stop()

    try:
        restrict_vocab = int(restrict_vocab.strip())
    except ValueError:
        restrict_vocab = None

    output = get_collocates_for_word_type(model=model,
                                          word=target_word,
                                          target_pos=target_word_pos,
                                          topn=collocate_number,
                                          restrict_vocab=restrict_vocab)
                                          
    st.write(output)

st.write('''<div style="text-align: justify;"><b>Related article:</b> Petrushenko L., Mitrofanova O. Predicting Style-Dependent Collocations in Russian Text Corpora. // 
         The 17th Workshop on Recent Advances in Slavonic Natural Languages Processing, RASLAN 2023, Kouty nad Desnou, Czech Republic, December 8-10, 2023. – 
         pp. 79–89. – URL: <a href="http://nlp.fi.muni.cz/raslan/2023/paper13.pdf">http://nlp.fi.muni.cz/raslan/2023/paper13.pdf</a></div>''', unsafe_allow_html=True)