Spaces:

a-v-bely
/

russian-task-generator

Running

App Files Files Community

a-v-bely commited on May 17, 2024

Commit

28dab52

1 Parent(s): 703d114

Fix bugs

Browse files

Files changed (2) hide show

utilities_language_general/rus_utils.py +4 -5
utilities_language_w2v/rus_sentence_w2v.py +2 -1

utilities_language_general/rus_utils.py CHANGED Viewed

@@ -281,7 +281,7 @@ def get_distractors_from_model(doc, model, scaler, classifier, pos_dict:dict, ta
                          and decision
                          and distractor_lemma != lemma
                          and len(distractors) < 100
-                         and (candidate_gender == gender and level_name in ('B1', 'B2', 'C1', 'C2'))
                          and length_ratio <= max_length_ratio
                          and distractor_lemma not in global_distractors
                          and edit_distance(lemma, distractor_lemma) / ((len(lemma) + len(distractor_lemma)) / 2) >
@@ -305,8 +305,8 @@ def get_distractors_from_model(doc, model, scaler, classifier, pos_dict:dict, ta
                                      level=level_name, target_lemma=query, target_text=target_text, target_pos=pos, target_position=lemma_index,
                                      substitute_lemma=candidate[0], substitute_pos=d_pos)
             condition = (((d1_pos == pos or d2_pos == pos)
-                          or (COMBINE_POS['simple'][level_name].get(pos) is not None and COMBINE_POS['simple'][level_name].get(distractor_pos) is not None
-                              and distractor_pos in COMBINE_POS['simple'][level_name][pos] and pos in COMBINE_POS['simple'][level_name][distractor_pos])
                           or (d1_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
                               and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP'))
                           or (d2_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
@@ -359,7 +359,6 @@ def get_distractors_from_model_bert(model, scaler, classifier, pos_dict:dict, le
             distractor_lemma, distractor_pos = candidate_morph.lemma_, candidate_morph.pos_
         distractor_similarity = candidate_distractor[1]
         candidate_gender = define_gender(distractor_lemma)
-        # print(distractor_lemma, candidate_gender, distractor_pos, pos)
         length_ratio = abs(len(lemma) - len(distractor_lemma))
         decision = make_decision(doc=None, model_type='bert', scaler=scaler, classifier=classifier, pos_dict=pos_dict, level=level_name,
                                  target_lemma=lemma, target_text=None, target_pos=pos, target_position=None,
@@ -370,7 +369,7 @@ def get_distractors_from_model_bert(model, scaler, classifier, pos_dict:dict, le
                           and decision
                           and distractor_lemma != lemma
                           and (len(_distractors) < max_num_distractors + 10)
-                          and (candidate_gender == gender and level_name in ('B1', 'B2', 'C1', 'C2'))
                           and (length_ratio <= max_length_ratio)  # May be changed if case of phrases
                           and (distractor_lemma not in global_distractors)
                           and (edit_distance(lemma, distractor_lemma)  # May be changed if case of phrases

                          and decision
                          and distractor_lemma != lemma
                          and len(distractors) < 100
+                         and (candidate_gender == gender and level_name in ('B1', 'B2', 'C1', 'C2') or level_name in ('A1', 'A2'))
                          and length_ratio <= max_length_ratio
                          and distractor_lemma not in global_distractors
                          and edit_distance(lemma, distractor_lemma) / ((len(lemma) + len(distractor_lemma)) / 2) >
                                      level=level_name, target_lemma=query, target_text=target_text, target_pos=pos, target_position=lemma_index,
                                      substitute_lemma=candidate[0], substitute_pos=d_pos)
             condition = (((d1_pos == pos or d2_pos == pos)
+                          or (COMBINE_POS['simple'][level_name].get(pos) is not None and COMBINE_POS['simple'][level_name].get(d_pos) is not None
+                              and d_pos in COMBINE_POS['simple'][level_name][pos] and pos in COMBINE_POS['simple'][level_name][d_pos])
                           or (d1_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
                               and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP'))
                           or (d2_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
             distractor_lemma, distractor_pos = candidate_morph.lemma_, candidate_morph.pos_
         distractor_similarity = candidate_distractor[1]
         candidate_gender = define_gender(distractor_lemma)
         length_ratio = abs(len(lemma) - len(distractor_lemma))
         decision = make_decision(doc=None, model_type='bert', scaler=scaler, classifier=classifier, pos_dict=pos_dict, level=level_name,
                                  target_lemma=lemma, target_text=None, target_pos=pos, target_position=None,
                           and decision
                           and distractor_lemma != lemma
                           and (len(_distractors) < max_num_distractors + 10)
+                          and (candidate_gender == gender and level_name in ('B1', 'B2', 'C1', 'C2') or level_name in ('A1', 'A2'))
                           and (length_ratio <= max_length_ratio)  # May be changed if case of phrases
                           and (distractor_lemma not in global_distractors)
                           and (edit_distance(lemma, distractor_lemma)  # May be changed if case of phrases

utilities_language_w2v/rus_sentence_w2v.py CHANGED Viewed

@@ -86,6 +86,7 @@ class SENTENCE:
         for _utw in user_target_words:
             if _utw in self.original:
                 parse_utw = nlp(_utw)
                 if ' ' in _utw:
                     tags = get_tags(parse_utw[0].text)[0] | get_tags(parse_utw[1].text)[0]
                     user_target_word_lemma = '_'.join([f'{token.lemma_}_{token.pos_}' for token in parse_utw])
@@ -103,7 +104,7 @@ class SENTENCE:
                     'original_text': _utw,
                     'lemma': user_target_word_lemma,
                     'pos': user_target_word_pos,
-                    'gender': convert_gender(user_target_word_tags.get('Gender')),
                     'tags': user_target_word_tags,
                     'position_in_sentence': self.original.find(_utw),
                     'not_named_entity': not_ner,

         for _utw in user_target_words:
             if _utw in self.original:
                 parse_utw = nlp(_utw)
+                gender = convert_gender(parse_utw[0].morph.to_dict().get('Gender'))
                 if ' ' in _utw:
                     tags = get_tags(parse_utw[0].text)[0] | get_tags(parse_utw[1].text)[0]
                     user_target_word_lemma = '_'.join([f'{token.lemma_}_{token.pos_}' for token in parse_utw])
                     'original_text': _utw,
                     'lemma': user_target_word_lemma,
                     'pos': user_target_word_pos,
+                    'gender': gender if gender else 'masc',
                     'tags': user_target_word_tags,
                     'position_in_sentence': self.original.find(_utw),
                     'not_named_entity': not_ner,