Spaces:
Running
Running
a-v-bely
commited on
Commit
·
28dab52
1
Parent(s):
703d114
Fix bugs
Browse files
utilities_language_general/rus_utils.py
CHANGED
@@ -281,7 +281,7 @@ def get_distractors_from_model(doc, model, scaler, classifier, pos_dict:dict, ta
|
|
281 |
and decision
|
282 |
and distractor_lemma != lemma
|
283 |
and len(distractors) < 100
|
284 |
-
and (candidate_gender == gender and level_name in ('B1', 'B2', 'C1', 'C2'))
|
285 |
and length_ratio <= max_length_ratio
|
286 |
and distractor_lemma not in global_distractors
|
287 |
and edit_distance(lemma, distractor_lemma) / ((len(lemma) + len(distractor_lemma)) / 2) >
|
@@ -305,8 +305,8 @@ def get_distractors_from_model(doc, model, scaler, classifier, pos_dict:dict, ta
|
|
305 |
level=level_name, target_lemma=query, target_text=target_text, target_pos=pos, target_position=lemma_index,
|
306 |
substitute_lemma=candidate[0], substitute_pos=d_pos)
|
307 |
condition = (((d1_pos == pos or d2_pos == pos)
|
308 |
-
or (COMBINE_POS['simple'][level_name].get(pos) is not None and COMBINE_POS['simple'][level_name].get(
|
309 |
-
and
|
310 |
or (d1_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
|
311 |
and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP'))
|
312 |
or (d2_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
|
@@ -359,7 +359,6 @@ def get_distractors_from_model_bert(model, scaler, classifier, pos_dict:dict, le
|
|
359 |
distractor_lemma, distractor_pos = candidate_morph.lemma_, candidate_morph.pos_
|
360 |
distractor_similarity = candidate_distractor[1]
|
361 |
candidate_gender = define_gender(distractor_lemma)
|
362 |
-
# print(distractor_lemma, candidate_gender, distractor_pos, pos)
|
363 |
length_ratio = abs(len(lemma) - len(distractor_lemma))
|
364 |
decision = make_decision(doc=None, model_type='bert', scaler=scaler, classifier=classifier, pos_dict=pos_dict, level=level_name,
|
365 |
target_lemma=lemma, target_text=None, target_pos=pos, target_position=None,
|
@@ -370,7 +369,7 @@ def get_distractors_from_model_bert(model, scaler, classifier, pos_dict:dict, le
|
|
370 |
and decision
|
371 |
and distractor_lemma != lemma
|
372 |
and (len(_distractors) < max_num_distractors + 10)
|
373 |
-
and (candidate_gender == gender and level_name in ('B1', 'B2', 'C1', 'C2'))
|
374 |
and (length_ratio <= max_length_ratio) # May be changed if case of phrases
|
375 |
and (distractor_lemma not in global_distractors)
|
376 |
and (edit_distance(lemma, distractor_lemma) # May be changed if case of phrases
|
|
|
281 |
and decision
|
282 |
and distractor_lemma != lemma
|
283 |
and len(distractors) < 100
|
284 |
+
and (candidate_gender == gender and level_name in ('B1', 'B2', 'C1', 'C2') or level_name in ('A1', 'A2'))
|
285 |
and length_ratio <= max_length_ratio
|
286 |
and distractor_lemma not in global_distractors
|
287 |
and edit_distance(lemma, distractor_lemma) / ((len(lemma) + len(distractor_lemma)) / 2) >
|
|
|
305 |
level=level_name, target_lemma=query, target_text=target_text, target_pos=pos, target_position=lemma_index,
|
306 |
substitute_lemma=candidate[0], substitute_pos=d_pos)
|
307 |
condition = (((d1_pos == pos or d2_pos == pos)
|
308 |
+
or (COMBINE_POS['simple'][level_name].get(pos) is not None and COMBINE_POS['simple'][level_name].get(d_pos) is not None
|
309 |
+
and d_pos in COMBINE_POS['simple'][level_name][pos] and pos in COMBINE_POS['simple'][level_name][d_pos])
|
310 |
or (d1_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
|
311 |
and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP'))
|
312 |
or (d2_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
|
|
|
359 |
distractor_lemma, distractor_pos = candidate_morph.lemma_, candidate_morph.pos_
|
360 |
distractor_similarity = candidate_distractor[1]
|
361 |
candidate_gender = define_gender(distractor_lemma)
|
|
|
362 |
length_ratio = abs(len(lemma) - len(distractor_lemma))
|
363 |
decision = make_decision(doc=None, model_type='bert', scaler=scaler, classifier=classifier, pos_dict=pos_dict, level=level_name,
|
364 |
target_lemma=lemma, target_text=None, target_pos=pos, target_position=None,
|
|
|
369 |
and decision
|
370 |
and distractor_lemma != lemma
|
371 |
and (len(_distractors) < max_num_distractors + 10)
|
372 |
+
and (candidate_gender == gender and level_name in ('B1', 'B2', 'C1', 'C2') or level_name in ('A1', 'A2'))
|
373 |
and (length_ratio <= max_length_ratio) # May be changed if case of phrases
|
374 |
and (distractor_lemma not in global_distractors)
|
375 |
and (edit_distance(lemma, distractor_lemma) # May be changed if case of phrases
|
utilities_language_w2v/rus_sentence_w2v.py
CHANGED
@@ -86,6 +86,7 @@ class SENTENCE:
|
|
86 |
for _utw in user_target_words:
|
87 |
if _utw in self.original:
|
88 |
parse_utw = nlp(_utw)
|
|
|
89 |
if ' ' in _utw:
|
90 |
tags = get_tags(parse_utw[0].text)[0] | get_tags(parse_utw[1].text)[0]
|
91 |
user_target_word_lemma = '_'.join([f'{token.lemma_}_{token.pos_}' for token in parse_utw])
|
@@ -103,7 +104,7 @@ class SENTENCE:
|
|
103 |
'original_text': _utw,
|
104 |
'lemma': user_target_word_lemma,
|
105 |
'pos': user_target_word_pos,
|
106 |
-
'gender':
|
107 |
'tags': user_target_word_tags,
|
108 |
'position_in_sentence': self.original.find(_utw),
|
109 |
'not_named_entity': not_ner,
|
|
|
86 |
for _utw in user_target_words:
|
87 |
if _utw in self.original:
|
88 |
parse_utw = nlp(_utw)
|
89 |
+
gender = convert_gender(parse_utw[0].morph.to_dict().get('Gender'))
|
90 |
if ' ' in _utw:
|
91 |
tags = get_tags(parse_utw[0].text)[0] | get_tags(parse_utw[1].text)[0]
|
92 |
user_target_word_lemma = '_'.join([f'{token.lemma_}_{token.pos_}' for token in parse_utw])
|
|
|
104 |
'original_text': _utw,
|
105 |
'lemma': user_target_word_lemma,
|
106 |
'pos': user_target_word_pos,
|
107 |
+
'gender': gender if gender else 'masc',
|
108 |
'tags': user_target_word_tags,
|
109 |
'position_in_sentence': self.original.find(_utw),
|
110 |
'not_named_entity': not_ner,
|