Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -17,6 +17,8 @@ import string
|
|
17 |
import io
|
18 |
from datetime import datetime, timedelta
|
19 |
import plotly.express as px
|
|
|
|
|
20 |
|
21 |
|
22 |
morph = pymorphy2.MorphAnalyzer()
|
@@ -693,20 +695,16 @@ def load_previous_user_request_from_github():
|
|
693 |
return "", "", "", "", "", "", "", "", "", "", None, None, None, None, None, None
|
694 |
|
695 |
|
696 |
-
def get_reference_message(gender, generation, psychotype, business_stage, industry, legal_form):
|
697 |
import io
|
698 |
-
|
699 |
repo = "fruitpicker01/Storage_dev"
|
700 |
file_path = "messages.csv"
|
701 |
-
|
702 |
url = f"https://api.github.com/repos/{repo}/contents/{file_path}"
|
703 |
headers = {
|
704 |
"Authorization": f"token {token}",
|
705 |
"Content-Type": "application/json"
|
706 |
}
|
707 |
-
|
708 |
response = requests.get(url, headers=headers)
|
709 |
-
|
710 |
if response.status_code == 200:
|
711 |
content = response.json()
|
712 |
file_content = base64.b64decode(content['content'])
|
@@ -715,11 +713,9 @@ def get_reference_message(gender, generation, psychotype, business_stage, indust
|
|
715 |
print(f"Error accessing the file: {response.status_code}")
|
716 |
return None
|
717 |
|
718 |
-
# Нормализуем данные для корректного сравнения
|
719 |
for col in ["Пол", "Поколение", "Психотип", "Стадия бизнеса", "Отрасль", "ОПФ"]:
|
720 |
df[col] = df[col].astype(str).str.strip().str.lower()
|
721 |
|
722 |
-
# Нормализуем входные параметры
|
723 |
params = {
|
724 |
"Пол": str(gender).strip().lower() if gender else None,
|
725 |
"Поколение": str(generation).strip().lower() if generation else None,
|
@@ -729,10 +725,8 @@ def get_reference_message(gender, generation, psychotype, business_stage, indust
|
|
729 |
"ОПФ": str(legal_form).strip().lower() if legal_form else None
|
730 |
}
|
731 |
|
732 |
-
# Фильтруем строки, где поле "Комментарий" непустое
|
733 |
df = df[df["Комментарий"].isna() | (df["Комментарий"].str.strip() == '')]
|
734 |
-
|
735 |
-
# Формируем условия фильтрации
|
736 |
filter_conditions = []
|
737 |
for col, value in params.items():
|
738 |
if value and value.lower() != 'none':
|
@@ -742,7 +736,6 @@ def get_reference_message(gender, generation, psychotype, business_stage, indust
|
|
742 |
print("Не заданы параметры персонализации.")
|
743 |
return None
|
744 |
|
745 |
-
# Применяем фильтрацию
|
746 |
filter_condition = filter_conditions[0]
|
747 |
for condition in filter_conditions[1:]:
|
748 |
filter_condition &= condition
|
@@ -753,25 +746,37 @@ def get_reference_message(gender, generation, psychotype, business_stage, indust
|
|
753 |
print("Сообщения с заданными параметрами не найдены.")
|
754 |
return None
|
755 |
|
756 |
-
#
|
757 |
-
|
|
|
|
|
758 |
|
759 |
-
#
|
760 |
-
|
761 |
|
762 |
-
#
|
763 |
-
|
764 |
-
|
765 |
-
else:
|
766 |
-
reference_message = latest_row.get("Персонализированное сообщение", "")
|
767 |
|
768 |
-
#
|
769 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
770 |
|
|
|
|
|
|
|
|
|
|
|
771 |
return reference_message
|
772 |
|
773 |
|
774 |
def adapt_messages_to_best_example(
|
|
|
775 |
personalized_gigachat_pro,
|
776 |
personalized_gigachat_lite,
|
777 |
personalized_gigachat_plus,
|
@@ -790,7 +795,7 @@ def adapt_messages_to_best_example(
|
|
790 |
legal_form = selected_values[5]
|
791 |
|
792 |
# Получение эталонного сообщения (без изменений)
|
793 |
-
reference_message = get_reference_message(gender, generation, psychotype, business_stage, industry, legal_form)
|
794 |
|
795 |
if not reference_message:
|
796 |
# Если эталонное сообщение не найдено
|
@@ -871,7 +876,7 @@ def adapt_messages_to_best_example(
|
|
871 |
yield adapted_gigachat_pro_display, adapted_gigachat_lite_display, adapted_gigachat_plus_display, adapted_gpt4o_display, adapted_meta_llama_405b_display
|
872 |
|
873 |
|
874 |
-
def update_best_example_prompt(*selected_values):
|
875 |
# Extract personalization parameters
|
876 |
gender = selected_values[0]
|
877 |
generation = selected_values[1]
|
@@ -881,7 +886,7 @@ def update_best_example_prompt(*selected_values):
|
|
881 |
legal_form = selected_values[5]
|
882 |
|
883 |
# Retrieve the reference message
|
884 |
-
reference_message = get_reference_message(gender, generation, psychotype, business_stage, industry, legal_form)
|
885 |
|
886 |
if not reference_message:
|
887 |
# No reference message found
|
|
|
17 |
import io
|
18 |
from datetime import datetime, timedelta
|
19 |
import plotly.express as px
|
20 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
21 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
22 |
|
23 |
|
24 |
morph = pymorphy2.MorphAnalyzer()
|
|
|
695 |
return "", "", "", "", "", "", "", "", "", "", None, None, None, None, None, None
|
696 |
|
697 |
|
698 |
+
def get_reference_message(current_description, gender, generation, psychotype, business_stage, industry, legal_form):
|
699 |
import io
|
|
|
700 |
repo = "fruitpicker01/Storage_dev"
|
701 |
file_path = "messages.csv"
|
|
|
702 |
url = f"https://api.github.com/repos/{repo}/contents/{file_path}"
|
703 |
headers = {
|
704 |
"Authorization": f"token {token}",
|
705 |
"Content-Type": "application/json"
|
706 |
}
|
|
|
707 |
response = requests.get(url, headers=headers)
|
|
|
708 |
if response.status_code == 200:
|
709 |
content = response.json()
|
710 |
file_content = base64.b64decode(content['content'])
|
|
|
713 |
print(f"Error accessing the file: {response.status_code}")
|
714 |
return None
|
715 |
|
|
|
716 |
for col in ["Пол", "Поколение", "Психотип", "Стадия бизнеса", "Отрасль", "ОПФ"]:
|
717 |
df[col] = df[col].astype(str).str.strip().str.lower()
|
718 |
|
|
|
719 |
params = {
|
720 |
"Пол": str(gender).strip().lower() if gender else None,
|
721 |
"Поколение": str(generation).strip().lower() if generation else None,
|
|
|
725 |
"ОПФ": str(legal_form).strip().lower() if legal_form else None
|
726 |
}
|
727 |
|
|
|
728 |
df = df[df["Комментарий"].isna() | (df["Комментарий"].str.strip() == '')]
|
729 |
+
|
|
|
730 |
filter_conditions = []
|
731 |
for col, value in params.items():
|
732 |
if value and value.lower() != 'none':
|
|
|
736 |
print("Не заданы параметры персонализации.")
|
737 |
return None
|
738 |
|
|
|
739 |
filter_condition = filter_conditions[0]
|
740 |
for condition in filter_conditions[1:]:
|
741 |
filter_condition &= condition
|
|
|
746 |
print("Сообщения с заданными параметрами не найдены.")
|
747 |
return None
|
748 |
|
749 |
+
# Проверяем, что колонка 'Описание предложения' существует
|
750 |
+
if 'Описание предложения' not in filtered_df.columns:
|
751 |
+
print("Описание предложения отсутствует в данных.")
|
752 |
+
return None
|
753 |
|
754 |
+
# Заменяем NaN на пустые строки в 'Описание предложения'
|
755 |
+
filtered_df['Описание предложения'] = filtered_df['Описание предложения'].fillna('')
|
756 |
|
757 |
+
# Собираем описания для вычисления сходства
|
758 |
+
descriptions = filtered_df['Описание предложения'].tolist()
|
759 |
+
descriptions.insert(0, current_description) # Добавляем текущее описание в начало списка
|
|
|
|
|
760 |
|
761 |
+
# Вычисляем TF-IDF векторы и косинусное сходство
|
762 |
+
vectorizer = TfidfVectorizer()
|
763 |
+
tfidf_matrix = vectorizer.fit_transform(descriptions)
|
764 |
+
cosine_similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()
|
765 |
+
|
766 |
+
# Находим индекс сообщения с наибольшим сходством
|
767 |
+
most_similar_idx = cosine_similarities.argmax()
|
768 |
+
similar_row = filtered_df.iloc[most_similar_idx]
|
769 |
|
770 |
+
if pd.notnull(similar_row.get("Откорректированное сообщение", None)) and similar_row["Откорректированное сообщение"].strip():
|
771 |
+
reference_message = similar_row["Откорректированное сообщение"]
|
772 |
+
else:
|
773 |
+
reference_message = similar_row.get("Персонализированное сообщение", "")
|
774 |
+
reference_message = re.sub(r'\n-{6,}\nКоличество знаков: \d+', '', reference_message).strip()
|
775 |
return reference_message
|
776 |
|
777 |
|
778 |
def adapt_messages_to_best_example(
|
779 |
+
description,
|
780 |
personalized_gigachat_pro,
|
781 |
personalized_gigachat_lite,
|
782 |
personalized_gigachat_plus,
|
|
|
795 |
legal_form = selected_values[5]
|
796 |
|
797 |
# Получение эталонного сообщения (без изменений)
|
798 |
+
reference_message = get_reference_message(description, gender, generation, psychotype, business_stage, industry, legal_form)
|
799 |
|
800 |
if not reference_message:
|
801 |
# Если эталонное сообщение не найдено
|
|
|
876 |
yield adapted_gigachat_pro_display, adapted_gigachat_lite_display, adapted_gigachat_plus_display, adapted_gpt4o_display, adapted_meta_llama_405b_display
|
877 |
|
878 |
|
879 |
+
def update_best_example_prompt(description, *selected_values):
|
880 |
# Extract personalization parameters
|
881 |
gender = selected_values[0]
|
882 |
generation = selected_values[1]
|
|
|
886 |
legal_form = selected_values[5]
|
887 |
|
888 |
# Retrieve the reference message
|
889 |
+
reference_message = get_reference_message(description, gender, generation, psychotype, business_stage, industry, legal_form)
|
890 |
|
891 |
if not reference_message:
|
892 |
# No reference message found
|