a-v-bely commited on
Commit
28dab52
·
1 Parent(s): 703d114
utilities_language_general/rus_utils.py CHANGED
@@ -281,7 +281,7 @@ def get_distractors_from_model(doc, model, scaler, classifier, pos_dict:dict, ta
281
  and decision
282
  and distractor_lemma != lemma
283
  and len(distractors) < 100
284
- and (candidate_gender == gender and level_name in ('B1', 'B2', 'C1', 'C2'))
285
  and length_ratio <= max_length_ratio
286
  and distractor_lemma not in global_distractors
287
  and edit_distance(lemma, distractor_lemma) / ((len(lemma) + len(distractor_lemma)) / 2) >
@@ -305,8 +305,8 @@ def get_distractors_from_model(doc, model, scaler, classifier, pos_dict:dict, ta
305
  level=level_name, target_lemma=query, target_text=target_text, target_pos=pos, target_position=lemma_index,
306
  substitute_lemma=candidate[0], substitute_pos=d_pos)
307
  condition = (((d1_pos == pos or d2_pos == pos)
308
- or (COMBINE_POS['simple'][level_name].get(pos) is not None and COMBINE_POS['simple'][level_name].get(distractor_pos) is not None
309
- and distractor_pos in COMBINE_POS['simple'][level_name][pos] and pos in COMBINE_POS['simple'][level_name][distractor_pos])
310
  or (d1_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
311
  and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP'))
312
  or (d2_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
@@ -359,7 +359,6 @@ def get_distractors_from_model_bert(model, scaler, classifier, pos_dict:dict, le
359
  distractor_lemma, distractor_pos = candidate_morph.lemma_, candidate_morph.pos_
360
  distractor_similarity = candidate_distractor[1]
361
  candidate_gender = define_gender(distractor_lemma)
362
- # print(distractor_lemma, candidate_gender, distractor_pos, pos)
363
  length_ratio = abs(len(lemma) - len(distractor_lemma))
364
  decision = make_decision(doc=None, model_type='bert', scaler=scaler, classifier=classifier, pos_dict=pos_dict, level=level_name,
365
  target_lemma=lemma, target_text=None, target_pos=pos, target_position=None,
@@ -370,7 +369,7 @@ def get_distractors_from_model_bert(model, scaler, classifier, pos_dict:dict, le
370
  and decision
371
  and distractor_lemma != lemma
372
  and (len(_distractors) < max_num_distractors + 10)
373
- and (candidate_gender == gender and level_name in ('B1', 'B2', 'C1', 'C2'))
374
  and (length_ratio <= max_length_ratio) # May be changed if case of phrases
375
  and (distractor_lemma not in global_distractors)
376
  and (edit_distance(lemma, distractor_lemma) # May be changed if case of phrases
 
281
  and decision
282
  and distractor_lemma != lemma
283
  and len(distractors) < 100
284
+ and (candidate_gender == gender and level_name in ('B1', 'B2', 'C1', 'C2') or level_name in ('A1', 'A2'))
285
  and length_ratio <= max_length_ratio
286
  and distractor_lemma not in global_distractors
287
  and edit_distance(lemma, distractor_lemma) / ((len(lemma) + len(distractor_lemma)) / 2) >
 
305
  level=level_name, target_lemma=query, target_text=target_text, target_pos=pos, target_position=lemma_index,
306
  substitute_lemma=candidate[0], substitute_pos=d_pos)
307
  condition = (((d1_pos == pos or d2_pos == pos)
308
+ or (COMBINE_POS['simple'][level_name].get(pos) is not None and COMBINE_POS['simple'][level_name].get(d_pos) is not None
309
+ and d_pos in COMBINE_POS['simple'][level_name][pos] and pos in COMBINE_POS['simple'][level_name][d_pos])
310
  or (d1_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
311
  and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP'))
312
  or (d2_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
 
359
  distractor_lemma, distractor_pos = candidate_morph.lemma_, candidate_morph.pos_
360
  distractor_similarity = candidate_distractor[1]
361
  candidate_gender = define_gender(distractor_lemma)
 
362
  length_ratio = abs(len(lemma) - len(distractor_lemma))
363
  decision = make_decision(doc=None, model_type='bert', scaler=scaler, classifier=classifier, pos_dict=pos_dict, level=level_name,
364
  target_lemma=lemma, target_text=None, target_pos=pos, target_position=None,
 
369
  and decision
370
  and distractor_lemma != lemma
371
  and (len(_distractors) < max_num_distractors + 10)
372
+ and (candidate_gender == gender and level_name in ('B1', 'B2', 'C1', 'C2') or level_name in ('A1', 'A2'))
373
  and (length_ratio <= max_length_ratio) # May be changed if case of phrases
374
  and (distractor_lemma not in global_distractors)
375
  and (edit_distance(lemma, distractor_lemma) # May be changed if case of phrases
utilities_language_w2v/rus_sentence_w2v.py CHANGED
@@ -86,6 +86,7 @@ class SENTENCE:
86
  for _utw in user_target_words:
87
  if _utw in self.original:
88
  parse_utw = nlp(_utw)
 
89
  if ' ' in _utw:
90
  tags = get_tags(parse_utw[0].text)[0] | get_tags(parse_utw[1].text)[0]
91
  user_target_word_lemma = '_'.join([f'{token.lemma_}_{token.pos_}' for token in parse_utw])
@@ -103,7 +104,7 @@ class SENTENCE:
103
  'original_text': _utw,
104
  'lemma': user_target_word_lemma,
105
  'pos': user_target_word_pos,
106
- 'gender': convert_gender(user_target_word_tags.get('Gender')),
107
  'tags': user_target_word_tags,
108
  'position_in_sentence': self.original.find(_utw),
109
  'not_named_entity': not_ner,
 
86
  for _utw in user_target_words:
87
  if _utw in self.original:
88
  parse_utw = nlp(_utw)
89
+ gender = convert_gender(parse_utw[0].morph.to_dict().get('Gender'))
90
  if ' ' in _utw:
91
  tags = get_tags(parse_utw[0].text)[0] | get_tags(parse_utw[1].text)[0]
92
  user_target_word_lemma = '_'.join([f'{token.lemma_}_{token.pos_}' for token in parse_utw])
 
104
  'original_text': _utw,
105
  'lemma': user_target_word_lemma,
106
  'pos': user_target_word_pos,
107
+ 'gender': gender if gender else 'masc',
108
  'tags': user_target_word_tags,
109
  'position_in_sentence': self.original.find(_utw),
110
  'not_named_entity': not_ner,