Spaces:

a-v-bely
/

spanish-task-generator

Running

App Files Files Community

a-v-bely commited on Mar 29, 2024

Commit

1156b6f

1 Parent(s): 736e3e5

Update (summarization & pos combinations)

Browse files

Files changed (22) hide show

pages/4_📝_Онлайн-тест.py +1 -1
requirements.txt +1 -0
utilities/__pycache__/utils.cpython-310.pyc +0 -0
utilities_cookies/__pycache__/cookie_manager.cpython-310.pyc +0 -0
utilities_cookies/__pycache__/encrypted_cookie_manager.cpython-310.pyc +0 -0
utilities_database/__pycache__/user_database_utils.cpython-310.pyc +0 -0
utilities_database/__pycache__/user_database_widgets.cpython-310.pyc +0 -0
utilities_language_bert/__pycache__/esp_main_workflow_bert.cpython-310.pyc +0 -0
utilities_language_bert/__pycache__/esp_sentence_bert.cpython-310.pyc +0 -0
utilities_language_bert/esp_main_workflow_bert.py +20 -1
utilities_language_bert/esp_sentence_bert.py +10 -7
utilities_language_general/__pycache__/esp_constants.cpython-310.pyc +0 -0
utilities_language_general/__pycache__/esp_utils.cpython-310.pyc +0 -0
utilities_language_general/__pycache__/morphology.cpython-310.pyc +0 -0
utilities_language_general/esp_constants.py +33 -3
utilities_language_general/esp_utils.py +13 -10
utilities_language_w2v/__pycache__/esp_main_workflow_w2v.cpython-310.pyc +0 -0
utilities_language_w2v/__pycache__/esp_sentence_w2v.cpython-310.pyc +0 -0
utilities_language_w2v/esp_main_workflow_w2v.py +25 -2
utilities_language_w2v/esp_sentence_w2v.py +10 -7
utilities_option_menu/__pycache__/option_menu.cpython-310.pyc +0 -0
utilities_ui/__pycache__/custom_download_button.cpython-310.pyc +0 -0

pages/4_📝_Онлайн-тест.py CHANGED Viewed

@@ -43,7 +43,7 @@ if st.session_state.get('-ONLINE_TEST_READY-') and st.session_state.get('-LOGGED
         use_container_width=True)
     COMMENTS = ONLINE_TEST.text_area(label='**Прокомментировать**',
                                      placeholder='Напишите комментарий')
-    SUBMIT = ONLINE_TEST.form_submit_button('READY')
     if SUBMIT:
         points = test_mark = 'Teacher'
         appropriate_tasks = BAD_DISTRACTORS_AND_ANSWERS_temp["Задание уместно"].values.tolist()

         use_container_width=True)
     COMMENTS = ONLINE_TEST.text_area(label='**Прокомментировать**',
                                      placeholder='Напишите комментарий')
+    SUBMIT = ONLINE_TEST.form_submit_button('ГОТОВО')
     if SUBMIT:
         points = test_mark = 'Teacher'
         appropriate_tasks = BAD_DISTRACTORS_AND_ANSWERS_temp["Задание уместно"].values.tolist()

requirements.txt CHANGED Viewed

@@ -11,4 +11,5 @@ argon2-cffi>=21.3.0
 cryptography>=42.0.3
 transformers>=4.37.2
 streamlit-extras>=0.4.0
 es_core_news_lg @ https://github.com/explosion/spacy-models/releases/download/es_core_news_lg-3.7.0/es_core_news_lg-3.7.0-py3-none-any.whl

 cryptography>=42.0.3
 transformers>=4.37.2
 streamlit-extras>=0.4.0
+bert-extractive-summarizer>=0.10.1
 es_core_news_lg @ https://github.com/explosion/spacy-models/releases/download/es_core_news_lg-3.7.0/es_core_news_lg-3.7.0-py3-none-any.whl

utilities/__pycache__/utils.cpython-310.pyc CHANGED Viewed

Binary files a/utilities/__pycache__/utils.cpython-310.pyc and b/utilities/__pycache__/utils.cpython-310.pyc differ

utilities_cookies/__pycache__/cookie_manager.cpython-310.pyc CHANGED Viewed

Binary files a/utilities_cookies/__pycache__/cookie_manager.cpython-310.pyc and b/utilities_cookies/__pycache__/cookie_manager.cpython-310.pyc differ

utilities_cookies/__pycache__/encrypted_cookie_manager.cpython-310.pyc CHANGED Viewed

Binary files a/utilities_cookies/__pycache__/encrypted_cookie_manager.cpython-310.pyc and b/utilities_cookies/__pycache__/encrypted_cookie_manager.cpython-310.pyc differ

utilities_database/__pycache__/user_database_utils.cpython-310.pyc CHANGED Viewed

Binary files a/utilities_database/__pycache__/user_database_utils.cpython-310.pyc and b/utilities_database/__pycache__/user_database_utils.cpython-310.pyc differ

utilities_database/__pycache__/user_database_widgets.cpython-310.pyc CHANGED Viewed

Binary files a/utilities_database/__pycache__/user_database_widgets.cpython-310.pyc and b/utilities_database/__pycache__/user_database_widgets.cpython-310.pyc differ

utilities_language_bert/__pycache__/esp_main_workflow_bert.cpython-310.pyc CHANGED Viewed

Binary files a/utilities_language_bert/__pycache__/esp_main_workflow_bert.cpython-310.pyc and b/utilities_language_bert/__pycache__/esp_main_workflow_bert.cpython-310.pyc differ

utilities_language_bert/__pycache__/esp_sentence_bert.cpython-310.pyc CHANGED Viewed

Binary files a/utilities_language_bert/__pycache__/esp_sentence_bert.cpython-310.pyc and b/utilities_language_bert/__pycache__/esp_sentence_bert.cpython-310.pyc differ

utilities_language_bert/esp_main_workflow_bert.py CHANGED Viewed

@@ -11,11 +11,13 @@ from utilities_language_general.esp_utils import prepare_tasks
 from utilities_language_general.esp_constants import load_bert
 from streamlit.runtime.uploaded_file_manager import UploadedFile
 import utilities_language_general.esp_constants as esp_constants
 from utilities_language_general.esp_utils import prepare_target_words
 from utilities_language_general.esp_utils import compute_frequency_dict
 from utilities_language_general.esp_constants import BAD_USER_TARGET_WORDS
 def main_workflow(
         file: UploadedFile or None,
         text: str,
@@ -137,6 +139,22 @@ def main_workflow(
     logs.update(label="Запускаем процесс генерации заданий!", state='running')
     progress.progress(20)
     for sentence in workflow:
         sentence.lemmatize_sentence()
@@ -149,7 +167,8 @@ def main_workflow(
         sentence.search_target_words(target_words_automatic_mode=tw_mode_automatic_mode,
                                      target_minimum=target_minimum,
                                      user_target_words=USER_TARGET_WORDS,
-                                     frequency_dict=FREQ_DICT)
         progress.progress(int(30 + (j * (20 / len(workflow)))))
     progress.progress(50)
     DUPLICATE_TARGET_WORDS = defaultdict(list)

 from utilities_language_general.esp_constants import load_bert
 from streamlit.runtime.uploaded_file_manager import UploadedFile
 import utilities_language_general.esp_constants as esp_constants
+from utilities_language_general.esp_constants import summarization
 from utilities_language_general.esp_utils import prepare_target_words
 from utilities_language_general.esp_utils import compute_frequency_dict
 from utilities_language_general.esp_constants import BAD_USER_TARGET_WORDS
 def main_workflow(
         file: UploadedFile or None,
         text: str,
     logs.update(label="Запускаем процесс генерации заданий!", state='running')
     progress.progress(20)
+    # Define summary length
+    text_length = len(current_text_sentences)
+    if text_length <= 15:
+        summary_length = text_length
+    elif text_length <= 25:
+        summary_length = 15
+    else:
+        n = (text_length - 20) // 5
+        summary_length = 15 + 2 * n
+    round_summary_length = summary_length - (summary_length % - 10)
+    # Get summary. May choose between round_summary_length and summary_length
+    SUMMARY = summarization(current_text, num_sentences=round_summary_length)
+    logs.success('Нашли интересные предложения. Пригодятся!')
+    progress.progress(25)
     for sentence in workflow:
         sentence.lemmatize_sentence()
         sentence.search_target_words(target_words_automatic_mode=tw_mode_automatic_mode,
                                      target_minimum=target_minimum,
                                      user_target_words=USER_TARGET_WORDS,
+                                     frequency_dict=FREQ_DICT,
+                                     summary=SUMMARY)
         progress.progress(int(30 + (j * (20 / len(workflow)))))
     progress.progress(50)
     DUPLICATE_TARGET_WORDS = defaultdict(list)

utilities_language_bert/esp_sentence_bert.py CHANGED Viewed

@@ -48,7 +48,7 @@ class SENTENCE:
                     self.sentence_phrases.append(self.sentence_lemma_pos[i][1])
                 previous_was_phrase = False
-    def search_target_words_automatically(self, target_minimum: set, frequency_dict: dict = None):
         for token in self.sentence_phrases:
             if isinstance(token, list):  # if token is a phrase
                 original_token1 = token[1]['original_token1']
@@ -79,7 +79,8 @@ class SENTENCE:
                     'tags': tags,
                     'position_in_sentence': self.original.find(original_token1.text),
                     'not_named_entity': not_ner,
-                    'frequency_in_text': 0
                 }
                 self.target_words.append(target_word)
             else:  # if token is just a spacy.nlp token
@@ -98,10 +99,11 @@ class SENTENCE:
                         'position_in_sentence': self.original.find(token.text),
                         'not_named_entity': True if token.ent_type == 0 else False,
                         'frequency_in_text': frequency_dict.get(token.lemma_, 1),
                     }
                     self.target_words.append(target_word)
-    def search_user_target_words(self, user_target_words: set = None, frequency_dict: dict = None):
         for _utw in user_target_words:
             if _utw in self.original:
                 parse_utw = nlp(_utw)
@@ -137,19 +139,20 @@ class SENTENCE:
                     'tags': user_target_word_tags,
                     'position_in_sentence': self.original.find(_utw),
                     'not_named_entity': not_ner,
-                    'frequency_in_text': frequency_dict.get(user_target_word_lemma, 1)
                 }
                 self.target_words.append(target_word)
     def search_target_words(self, target_words_automatic_mode: bool, target_minimum,
                             user_target_words: set = None,
-                            frequency_dict: dict = None):
         if target_words_automatic_mode:
             self.search_target_words_automatically(target_minimum=target_minimum,
-                                                   frequency_dict=frequency_dict)
         else:
             self.search_user_target_words(user_target_words=user_target_words,
-                                          frequency_dict=frequency_dict)
     def filter_target_words(self, target_words_automatic_mode):
         c_position = 0

                     self.sentence_phrases.append(self.sentence_lemma_pos[i][1])
                 previous_was_phrase = False
+    def search_target_words_automatically(self, target_minimum: set, frequency_dict: dict = None, summary:list=None):
         for token in self.sentence_phrases:
             if isinstance(token, list):  # if token is a phrase
                 original_token1 = token[1]['original_token1']
                     'tags': tags,
                     'position_in_sentence': self.original.find(original_token1.text),
                     'not_named_entity': not_ner,
+                    'frequency_in_text': 0,
+                    'in_summary': self.original in summary
                 }
                 self.target_words.append(target_word)
             else:  # if token is just a spacy.nlp token
                         'position_in_sentence': self.original.find(token.text),
                         'not_named_entity': True if token.ent_type == 0 else False,
                         'frequency_in_text': frequency_dict.get(token.lemma_, 1),
+                        'in_summary': self.original in summary
                     }
                     self.target_words.append(target_word)
+    def search_user_target_words(self, user_target_words: set = None, frequency_dict: dict = None, summary:list=None):
         for _utw in user_target_words:
             if _utw in self.original:
                 parse_utw = nlp(_utw)
                     'tags': user_target_word_tags,
                     'position_in_sentence': self.original.find(_utw),
                     'not_named_entity': not_ner,
+                    'frequency_in_text': frequency_dict.get(user_target_word_lemma, 1),
+                    'in_summary': self.original in summary
                 }
                 self.target_words.append(target_word)
     def search_target_words(self, target_words_automatic_mode: bool, target_minimum,
                             user_target_words: set = None,
+                            frequency_dict: dict = None, summary:list=None):
         if target_words_automatic_mode:
             self.search_target_words_automatically(target_minimum=target_minimum,
+                                                   frequency_dict=frequency_dict, summary=summary)
         else:
             self.search_user_target_words(user_target_words=user_target_words,
+                                          frequency_dict=frequency_dict, summary=summary)
     def filter_target_words(self, target_words_automatic_mode):
         c_position = 0

utilities_language_general/__pycache__/esp_constants.cpython-310.pyc CHANGED Viewed

Binary files a/utilities_language_general/__pycache__/esp_constants.cpython-310.pyc and b/utilities_language_general/__pycache__/esp_constants.cpython-310.pyc differ

utilities_language_general/__pycache__/esp_utils.cpython-310.pyc CHANGED Viewed

Binary files a/utilities_language_general/__pycache__/esp_utils.cpython-310.pyc and b/utilities_language_general/__pycache__/esp_utils.cpython-310.pyc differ

utilities_language_general/__pycache__/morphology.cpython-310.pyc CHANGED Viewed

Binary files a/utilities_language_general/__pycache__/morphology.cpython-310.pyc and b/utilities_language_general/__pycache__/morphology.cpython-310.pyc differ

utilities_language_general/esp_constants.py CHANGED Viewed

@@ -3,6 +3,7 @@ import spacy
 import gensim
 import streamlit as st
 from transformers import pipeline
 @st.cache_resource
@@ -25,8 +26,12 @@ def load_bert():
         _pipeline = pipeline(task="fill-mask", model="a-v-white/bert-base-spanish-wwm-cased-finetuned-literature-pro")
     return _pipeline
 nlp = load_spacy()
 w2v_model_1_path = r'model1.gz'
 w2v_model_2_path = r'model2.gz'
@@ -57,7 +62,32 @@ with open('language_data/phrases.json', 'r', encoding='utf-8') as f:
 with open('language_data/fix_irregular_lemma.json', 'r', encoding='utf-8') as f:
     FIX_LEMMA = json.load(f)
-SIMILARITY_VALUES = {'A1': 1.0, 'A2': 1.0, 'B1': 1.0, 'B2': 1.0, 'C1': 1.0, 'C2': 1.0, 'Без уровня': 1.0}
-SIMILARITY_VALUES_bert = {'A1': 1.0, 'A2': 1.0, 'B1': 1.0, 'B2': 1.0, 'C1': 1.0, 'C2': 1.0, 'Без уровня': 1.0}
 BAD_USER_TARGET_WORDS = []

 import gensim
 import streamlit as st
 from transformers import pipeline
+from summarizer import Summarizer
 @st.cache_resource
         _pipeline = pipeline(task="fill-mask", model="a-v-white/bert-base-spanish-wwm-cased-finetuned-literature-pro")
     return _pipeline
+@st.cache_resource
+def load_summarizer():
+    return Summarizer()
 nlp = load_spacy()
+summarization = load_summarizer()
 w2v_model_1_path = r'model1.gz'
 w2v_model_2_path = r'model2.gz'
 with open('language_data/fix_irregular_lemma.json', 'r', encoding='utf-8') as f:
     FIX_LEMMA = json.load(f)
 BAD_USER_TARGET_WORDS = []
+COMBINE_POS = {
+    'simple':
+    {
+        'A1': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX'], 'AUX': ['VERB']},
+        'A2': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX'], 'AUX': ['VERB']},
+        'B1': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX'], 'AUX': ['VERB']},
+        'B2': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX', 'AUX_VERB'], 'AUX': ['VERB', 'AUX_VERB'], 'AUX_VERB': ['VERB', 'AUX'], 'AUX_AUX': ['AUX'],
+               'AUX_ADJ': ['PRON_VERB'], 'PRON_VERB': ['AUX_ADJ'], 'ADP': ['SCONJ', 'ADV'], },
+        'C1': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX', 'AUX_VERB'], 'AUX': ['VERB', 'AUX_VERB'], 'AUX_VERB': ['VERB', 'AUX'], 'AUX_AUX': ['AUX'],
+                'ADJ':['NOUN'], 'NOUN': ['ADJ']},
+        'C2': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX', 'AUX_VERB'], 'AUX': ['VERB', 'AUX_VERB'], 'AUX_VERB': ['VERB', 'AUX'], 'AUX_AUX': ['AUX'],
+                'ADJ':['NOUN'], 'NOUN': ['ADJ']},
+    },
+    'phrase':
+    {
+        'A1': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX'], 'AUX': ['VERB']},
+        'A2': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX'], 'AUX': ['VERB']},
+        'B1': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX'], 'AUX': ['VERB']},
+        'B2': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX', 'AUX_VERB'], 'AUX': ['VERB', 'AUX_VERB'], 'AUX_VERB': ['VERB', 'AUX'], 'AUX_AUX': ['AUX'],
+               'AUX_ADJ': ['PRON_VERB'], 'PRON_VERB': ['AUX_ADJ'], 'ADP': ['SCONJ', 'ADV'], },
+        'C1': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX', 'AUX_VERB'], 'AUX': ['VERB', 'AUX_VERB'], 'AUX_VERB': ['VERB', 'AUX'], 'AUX_AUX': ['AUX'],
+                'ADJ':['NOUN'], 'NOUN': ['ADJ']},
+        'C2': {'PRON': ['DET'], 'DET': ['PRON'], 'VERB': ['AUX', 'AUX_VERB'], 'AUX': ['VERB', 'AUX_VERB'], 'AUX_VERB': ['VERB', 'AUX'], 'AUX_AUX': ['AUX'],
+                'ADJ':['NOUN'], 'NOUN': ['ADJ']},
+    },
+}

utilities_language_general/esp_utils.py CHANGED Viewed

@@ -2,8 +2,7 @@ from nltk import edit_distance
 from utilities.utils import answer_letter
 from utilities_language_general.esp_constants import nlp
 from utilities_language_general.esp_constants import FIX_LEMMA
-from utilities_language_general.esp_constants import SIMILARITY_VALUES
-from utilities_language_general.esp_constants import SIMILARITY_VALUES_bert
 def prepare_target_words(target_words):
@@ -111,6 +110,7 @@ def check_token_bert(token, current_minimum: set = None, check_allowed_pos: set
 def get_distractors_from_model(model, lemma: str, pos: str, gender: str or None, global_distractors: set,
                                distractor_minimum: set, level_name: str, max_num_distractors: int,
                                max_length_ratio=5, min_edit_distance_ratio=0.5):
     distractors = []
     query = lemma if '_' in lemma else f'{lemma}_{pos}'
     lemma = '_'.join(lemma.split('_')[::2])
@@ -124,15 +124,16 @@ def get_distractors_from_model(model, lemma: str, pos: str, gender: str or None,
         query_vector = model.get_mean_vector(query_parts)
         candidates = model.similar_by_vector(query_vector, topn=max_num_distractors + 100)
     for candidate in candidates:
-        if candidate[0].count('_') == 1:
             distractor_lemma, distractor_pos = candidate[0].split('_')
             distractor_similarity = candidate[1]
             candidate_gender = get_tags(distractor_lemma).get('Gender')
             length_ratio = abs(len(lemma) - len(distractor_lemma))
-            condition = ((distractor_pos == pos
-                          or (distractor_pos in ('VERB', 'ADJ', 'phrase') and pos in ('VERB', 'ADJ', 'phrase')))
                          and distractor_lemma != lemma
-                         and distractor_similarity < SIMILARITY_VALUES[level_name]
                          and candidate_gender == gender
                          and length_ratio <= max_length_ratio
                          and distractor_lemma not in global_distractors
@@ -150,16 +151,18 @@ def get_distractors_from_model(model, lemma: str, pos: str, gender: str or None,
             if candidate[0].count('_') > 3 or pos in ('NOUN', 'ADJ', 'NUM'):
                 continue
             d1_lemma, d1_pos, d2_lemma, d2_pos = candidate[0].split('_')
             distractor_lemma = f'{d1_lemma}_{d2_lemma}'
             distractor_similarity = candidate[1]
             condition = (((d1_pos == pos or d2_pos == pos)
                           or (d1_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
                               and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP'))
                           or (d2_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
                               and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP')))
                          and candidate[0] != lemma
                          and distractor_lemma != lemma
-                         and distractor_similarity < SIMILARITY_VALUES[level_name]
                          and distractor_lemma not in global_distractors)
             if condition:
                 if distractor_minimum is not None:
@@ -202,11 +205,11 @@ def get_distractors_from_model_bert(model, text_with_masked_task: str, lemma: st
         distractor_similarity = candidate_distractor[1]
         candidate_gender = get_tags(distractor_lemma).get('Gender')
         length_ratio = abs(len(lemma) - len(distractor_lemma))
-        if (((distractor_pos == pos)
-             or (pos in ('VERB', 'ADJ', 'phrase') and distractor_pos in ('VERB', 'ADJ', 'phrase')))
                 and distractor_lemma != lemma
                 and (len(_distractors) < max_num_distractors+100)
-                and (distractor_similarity < SIMILARITY_VALUES_bert[level_name])
                 and (candidate_gender == gender)
                 and (length_ratio <= max_length_ratio)  # May be changed if case of phrases
                 and (distractor_lemma not in global_distractors)

 from utilities.utils import answer_letter
 from utilities_language_general.esp_constants import nlp
 from utilities_language_general.esp_constants import FIX_LEMMA
+from utilities_language_general.esp_constants import COMBINE_POS
 def prepare_target_words(target_words):
 def get_distractors_from_model(model, lemma: str, pos: str, gender: str or None, global_distractors: set,
                                distractor_minimum: set, level_name: str, max_num_distractors: int,
                                max_length_ratio=5, min_edit_distance_ratio=0.5):
     distractors = []
     query = lemma if '_' in lemma else f'{lemma}_{pos}'
     lemma = '_'.join(lemma.split('_')[::2])
         query_vector = model.get_mean_vector(query_parts)
         candidates = model.similar_by_vector(query_vector, topn=max_num_distractors + 100)
     for candidate in candidates:
+        if candidate[0].count('_') == 1 and pos != 'phrase':
             distractor_lemma, distractor_pos = candidate[0].split('_')
             distractor_similarity = candidate[1]
             candidate_gender = get_tags(distractor_lemma).get('Gender')
             length_ratio = abs(len(lemma) - len(distractor_lemma))
+            condition = ((distractor_pos == pos
+                          or (COMBINE_POS['simple'][level_name].get(pos) is not None and COMBINE_POS['simple'][level_name].get(distractor_pos) is not None
+                              and distractor_pos in COMBINE_POS['simple'][level_name][pos] and pos in COMBINE_POS['simple'][level_name][distractor_pos])
+                          )
                          and distractor_lemma != lemma
                          and candidate_gender == gender
                          and length_ratio <= max_length_ratio
                          and distractor_lemma not in global_distractors
             if candidate[0].count('_') > 3 or pos in ('NOUN', 'ADJ', 'NUM'):
                 continue
             d1_lemma, d1_pos, d2_lemma, d2_pos = candidate[0].split('_')
+            d_pos = f'{d1_pos}_{d2_pos}'
             distractor_lemma = f'{d1_lemma}_{d2_lemma}'
             distractor_similarity = candidate[1]
             condition = (((d1_pos == pos or d2_pos == pos)
+                          or (COMBINE_POS['phrase'][level_name].get(d_pos) is not None and COMBINE_POS['phrase'][level_name].get(pos) is not None
+                              and d_pos in COMBINE_POS['phrase'][level_name].get(d_pos)  and pos in COMBINE_POS['phrase'][level_name].get(pos) )
                           or (d1_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
                               and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP'))
                           or (d2_pos in ('VERB', 'AUX', 'SCONJ', 'ADP')
                               and pos in ('phrase', 'VERB', 'AUX', 'SCONJ', 'ADP')))
                          and candidate[0] != lemma
                          and distractor_lemma != lemma
                          and distractor_lemma not in global_distractors)
             if condition:
                 if distractor_minimum is not None:
         distractor_similarity = candidate_distractor[1]
         candidate_gender = get_tags(distractor_lemma).get('Gender')
         length_ratio = abs(len(lemma) - len(distractor_lemma))
+        if ((distractor_pos == pos
+             or (COMBINE_POS['simple'][level_name].get(pos) is not None and COMBINE_POS['simple'][level_name].get(distractor_pos) is not None
+                 and distractor_pos in COMBINE_POS['simple'][level_name][pos] and pos in COMBINE_POS['simple'][level_name][distractor_pos]))
                 and distractor_lemma != lemma
                 and (len(_distractors) < max_num_distractors+100)
                 and (candidate_gender == gender)
                 and (length_ratio <= max_length_ratio)  # May be changed if case of phrases
                 and (distractor_lemma not in global_distractors)

utilities_language_w2v/__pycache__/esp_main_workflow_w2v.cpython-310.pyc CHANGED Viewed

Binary files a/utilities_language_w2v/__pycache__/esp_main_workflow_w2v.cpython-310.pyc and b/utilities_language_w2v/__pycache__/esp_main_workflow_w2v.cpython-310.pyc differ

utilities_language_w2v/__pycache__/esp_sentence_w2v.cpython-310.pyc CHANGED Viewed

Binary files a/utilities_language_w2v/__pycache__/esp_sentence_w2v.cpython-310.pyc and b/utilities_language_w2v/__pycache__/esp_sentence_w2v.cpython-310.pyc differ

utilities_language_w2v/esp_main_workflow_w2v.py CHANGED Viewed

@@ -11,6 +11,7 @@ from utilities_language_general.esp_constants import load_w2v
 from utilities_language_general.esp_utils import prepare_tasks
 from streamlit.runtime.uploaded_file_manager import UploadedFile
 import utilities_language_general.esp_constants as esp_constants
 from utilities_language_general.esp_constants import w2v_model_1_path
 from utilities_language_general.esp_constants import w2v_model_2_path
 from utilities_language_general.esp_utils import prepare_target_words
@@ -146,6 +147,22 @@ def main_workflow(
     logs.update(label="Запускаем процесс генерации заданий!", state='running')
     progress.progress(20)
     for sentence in workflow:
         sentence.lemmatize_sentence()
@@ -159,7 +176,8 @@ def main_workflow(
                                      target_words_automatic_mode=tw_mode_automatic_mode,
                                      target_minimum=target_minimum,
                                      user_target_words=USER_TARGET_WORDS,
-                                     frequency_dict=FREQ_DICT)
         progress.progress(int(30 + (j * (30 / len(workflow)))))
     progress.progress(60)
     DUPLICATE_TARGET_WORDS = defaultdict(list)
@@ -217,7 +235,12 @@ def main_workflow(
                 NUMBER_TASKS = 10
             else:
                 NUMBER_TASKS = len(RESULT_TASKS)
-    RESULT_TASKS = sample(RESULT_TASKS, NUMBER_TASKS)
     RESULT_TASKS = sorted(RESULT_TASKS, key=lambda t: (t.sentence_number, t.position_in_sentence))
     for task in RESULT_TASKS:

 from utilities_language_general.esp_utils import prepare_tasks
 from streamlit.runtime.uploaded_file_manager import UploadedFile
 import utilities_language_general.esp_constants as esp_constants
+from utilities_language_general.esp_constants import summarization
 from utilities_language_general.esp_constants import w2v_model_1_path
 from utilities_language_general.esp_constants import w2v_model_2_path
 from utilities_language_general.esp_utils import prepare_target_words
     logs.update(label="Запускаем процесс генерации заданий!", state='running')
     progress.progress(20)
+    # Define summary length
+    text_length = len(current_text_sentences)
+    if text_length <= 15:
+        summary_length = text_length
+    elif text_length <= 25:
+        summary_length = 15
+    else:
+        n = (text_length - 20) // 5
+        summary_length = 15 + 2 * n
+    round_summary_length = summary_length - (summary_length % - 10)
+    # Get summary. May choose between round_summary_length and summary_length
+    SUMMARY = summarization(current_text, num_sentences=round_summary_length)
+    logs.success('Нашли интересные предложения. Пригодятся!')
+    progress.progress(25)
     for sentence in workflow:
         sentence.lemmatize_sentence()
                                      target_words_automatic_mode=tw_mode_automatic_mode,
                                      target_minimum=target_minimum,
                                      user_target_words=USER_TARGET_WORDS,
+                                     frequency_dict=FREQ_DICT,
+                                     summary=SUMMARY)
         progress.progress(int(30 + (j * (30 / len(workflow)))))
     progress.progress(60)
     DUPLICATE_TARGET_WORDS = defaultdict(list)
                 NUMBER_TASKS = 10
             else:
                 NUMBER_TASKS = len(RESULT_TASKS)
+    RESULT_TASKS_in_summary = filter(lambda task: task.in_summary, RESULT_TASKS)
+    RESULT_TASTS_not_in_summary = filter(lambda task: not task.in_summary, RESULT_TASKS)
+    if len(RESULT_TASKS_in_summary) >= NUMBER_TASKS:
+        RESULT_TASKS = RESULT_TASKS_in_summary
+    else:
+        RESULT_TASKS = RESULT_TASKS_in_summary + sample(RESULT_TASTS_not_in_summary, NUMBER_TASKS - len(RESULT_TASKS_in_summary))
     RESULT_TASKS = sorted(RESULT_TASKS, key=lambda t: (t.sentence_number, t.position_in_sentence))
     for task in RESULT_TASKS:

utilities_language_w2v/esp_sentence_w2v.py CHANGED Viewed

@@ -47,7 +47,7 @@ class SENTENCE:
                     self.sentence_phrases.append(self.sentence_lemma_pos[i][1])
                 previous_was_phrase = False
-    def search_target_words_automatically(self, model, target_minimum: set, frequency_dict: dict = None):
         for token in self.sentence_phrases:
             if isinstance(token, list):  # if token is a phrase
                 original_token1 = token[1]['original_token1']
@@ -76,7 +76,8 @@ class SENTENCE:
                     'tags': tags,
                     'position_in_sentence': self.original.find(original_token1.text),
                     'not_named_entity': not_ner,
-                    'frequency_in_text': 0
                 }
                 self.target_words.append(target_word)
             else:  # if token is just a spacy.nlp token
@@ -94,10 +95,11 @@ class SENTENCE:
                         'position_in_sentence': self.original.find(token.text),
                         'not_named_entity': True if token.ent_type == 0 else False,
                         'frequency_in_text': frequency_dict.get(token.lemma_, 1),
                     }
                     self.target_words.append(target_word)
-    def search_user_target_words(self, model, user_target_words: set = None, frequency_dict: dict = None):
         for _utw in user_target_words:
             if _utw in self.original:
                 parse_utw = nlp(_utw)
@@ -132,7 +134,8 @@ class SENTENCE:
                     'tags': user_target_word_tags,
                     'position_in_sentence': self.original.find(_utw),
                     'not_named_entity': not_ner,
-                    'frequency_in_text': frequency_dict.get(user_target_word_lemma, 1)
                 }
                 if not (model.has_index_for(user_target_word_lemma)
                         or model.has_index_for(f'{user_target_word_lemma}_{user_target_word_pos}')):
@@ -140,14 +143,14 @@ class SENTENCE:
                 else:
                     self.target_words.append(target_word)
-    def search_target_words(self, model, target_words_automatic_mode: bool, target_minimum,
                             user_target_words: set = None, frequency_dict: dict = None):
         if target_words_automatic_mode:
             self.search_target_words_automatically(model=model, target_minimum=target_minimum,
-                                                   frequency_dict=frequency_dict)
         else:
             self.search_user_target_words(model=model, user_target_words=user_target_words,
-                                          frequency_dict=frequency_dict)
     def attach_distractors_to_target_word(self, model, global_distractors, distractor_minimum, level_name,
                                           max_frequency, logs, progress):

                     self.sentence_phrases.append(self.sentence_lemma_pos[i][1])
                 previous_was_phrase = False
+    def search_target_words_automatically(self, model, target_minimum: set, frequency_dict: dict = None, summary:list=None):
         for token in self.sentence_phrases:
             if isinstance(token, list):  # if token is a phrase
                 original_token1 = token[1]['original_token1']
                     'tags': tags,
                     'position_in_sentence': self.original.find(original_token1.text),
                     'not_named_entity': not_ner,
+                    'frequency_in_text': 0,
+                    'in_summary': self.original in summary
                 }
                 self.target_words.append(target_word)
             else:  # if token is just a spacy.nlp token
                         'position_in_sentence': self.original.find(token.text),
                         'not_named_entity': True if token.ent_type == 0 else False,
                         'frequency_in_text': frequency_dict.get(token.lemma_, 1),
+                        'in_summary': self.original in summary
                     }
                     self.target_words.append(target_word)
+    def search_user_target_words(self, model, user_target_words: set = None, frequency_dict: dict = None, summary:list=None):
         for _utw in user_target_words:
             if _utw in self.original:
                 parse_utw = nlp(_utw)
                     'tags': user_target_word_tags,
                     'position_in_sentence': self.original.find(_utw),
                     'not_named_entity': not_ner,
+                    'frequency_in_text': frequency_dict.get(user_target_word_lemma, 1),
+                    'in_summary': self.original in summary
                 }
                 if not (model.has_index_for(user_target_word_lemma)
                         or model.has_index_for(f'{user_target_word_lemma}_{user_target_word_pos}')):
                 else:
                     self.target_words.append(target_word)
+    def search_target_words(self, model, target_words_automatic_mode: bool, target_minimum, summary:list=None,
                             user_target_words: set = None, frequency_dict: dict = None):
         if target_words_automatic_mode:
             self.search_target_words_automatically(model=model, target_minimum=target_minimum,
+                                                   frequency_dict=frequency_dict, summary=summary)
         else:
             self.search_user_target_words(model=model, user_target_words=user_target_words,
+                                          frequency_dict=frequency_dict, summary=summary)
     def attach_distractors_to_target_word(self, model, global_distractors, distractor_minimum, level_name,
                                           max_frequency, logs, progress):

utilities_option_menu/__pycache__/option_menu.cpython-310.pyc CHANGED Viewed

Binary files a/utilities_option_menu/__pycache__/option_menu.cpython-310.pyc and b/utilities_option_menu/__pycache__/option_menu.cpython-310.pyc differ

utilities_ui/__pycache__/custom_download_button.cpython-310.pyc CHANGED Viewed

Binary files a/utilities_ui/__pycache__/custom_download_button.cpython-310.pyc and b/utilities_ui/__pycache__/custom_download_button.cpython-310.pyc differ