Spaces:
Running
Running
togokah
commited on
Commit
·
3a8bdd9
1
Parent(s):
a27ec1d
Revert "Update rus_constants.py"
Browse filesThis reverts commit a27ec1d6de1bd8ab42f1cf04108a256bda66c432.
utilities_language_general/rus_constants.py
CHANGED
@@ -5,25 +5,21 @@ import pymorphy2
|
|
5 |
import streamlit as st
|
6 |
from transformers import pipeline
|
7 |
|
8 |
-
|
9 |
@st.cache_resource
|
10 |
def load_morph():
|
11 |
_morph = pymorphy2.MorphAnalyzer(lang='ru')
|
12 |
return _morph
|
13 |
|
14 |
-
|
15 |
@st.cache_resource
|
16 |
def load_w2v(model_path):
|
17 |
_w2v_model = gensim.models.KeyedVectors.load_word2vec_format(model_path, binary=True)
|
18 |
return _w2v_model
|
19 |
|
20 |
-
|
21 |
-
@st.cache_resource
|
22 |
def load_spacy():
|
23 |
_nlp = spacy.load('ru_core_news_lg')
|
24 |
return _nlp
|
25 |
|
26 |
-
|
27 |
@st.cache_resource
|
28 |
def load_bert():
|
29 |
return pipeline("fill-mask", model="a-v-white/ruBert-base-finetuned-russian-moshkov-child-corpus-pro")
|
@@ -63,8 +59,7 @@ c2_distractor_set = c2_target_set.union(c1_target_set)
|
|
63 |
with open('language_data/phrases.json', 'r', encoding='utf-8') as f:
|
64 |
PHRASES = set(json.load(f)['PHRASES'])
|
65 |
|
66 |
-
SIMILARITY_VALUES_w2v =
|
67 |
-
|
68 |
-
|
69 |
-
SIMILARITY_VALUES_bert = {SIMILARITY_VALUES_bert[i]: SIMILARITY_VALUES_bert[i + 1] for i in range(6)}
|
70 |
BAD_USER_TARGET_WORDS = []
|
|
|
5 |
import streamlit as st
|
6 |
from transformers import pipeline
|
7 |
|
|
|
8 |
@st.cache_resource
|
9 |
def load_morph():
|
10 |
_morph = pymorphy2.MorphAnalyzer(lang='ru')
|
11 |
return _morph
|
12 |
|
|
|
13 |
@st.cache_resource
|
14 |
def load_w2v(model_path):
|
15 |
_w2v_model = gensim.models.KeyedVectors.load_word2vec_format(model_path, binary=True)
|
16 |
return _w2v_model
|
17 |
|
18 |
+
@st.cache_resource
|
|
|
19 |
def load_spacy():
|
20 |
_nlp = spacy.load('ru_core_news_lg')
|
21 |
return _nlp
|
22 |
|
|
|
23 |
@st.cache_resource
|
24 |
def load_bert():
|
25 |
return pipeline("fill-mask", model="a-v-white/ruBert-base-finetuned-russian-moshkov-child-corpus-pro")
|
|
|
59 |
with open('language_data/phrases.json', 'r', encoding='utf-8') as f:
|
60 |
PHRASES = set(json.load(f)['PHRASES'])
|
61 |
|
62 |
+
SIMILARITY_VALUES_w2v = {'A1': 1.0, 'A2': 1.0, 'B1': 1.0, 'B2': 1.0, 'C1': 1.0, 'C2': 1.0, 'Без уровня': 1.0}
|
63 |
+
SIMILARITY_VALUES_bert = {'A1': 1.0, 'A2': 1.0, 'B1': 1.0, 'B2': 1.0, 'C1': 1.0, 'C2': 1.0, 'Без уровня': 1.0}
|
64 |
+
|
|
|
65 |
BAD_USER_TARGET_WORDS = []
|