Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -725,6 +725,13 @@ def get_reference_message(current_description, gender, generation, psychotype, b
|
|
725 |
import io
|
726 |
import numpy as np
|
727 |
from sentence_transformers import SentenceTransformer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
728 |
repo = "fruitpicker01/Storage_dev"
|
729 |
file_path = "messages.csv"
|
730 |
url = f"https://api.github.com/repos/{repo}/contents/{file_path}"
|
@@ -741,6 +748,7 @@ def get_reference_message(current_description, gender, generation, psychotype, b
|
|
741 |
print(f"Error accessing the file: {response.status_code}")
|
742 |
return None
|
743 |
|
|
|
744 |
for col in ["Пол", "Поколение", "Психотип", "Стадия бизнеса", "Отрасль", "ОПФ"]:
|
745 |
df[col] = df[col].astype(str).str.strip().str.lower()
|
746 |
|
@@ -780,40 +788,36 @@ def get_reference_message(current_description, gender, generation, psychotype, b
|
|
780 |
|
781 |
filtered_df['Описание предложения'] = filtered_df['Описание предложения'].fillna('')
|
782 |
|
783 |
-
#
|
784 |
model = SentenceTransformer('sergeyzh/rubert-tiny-turbo')
|
785 |
descriptions = filtered_df['Описание предложения'].tolist()
|
786 |
-
descriptions.insert(0, current_description)
|
787 |
|
788 |
embeddings = model.encode(descriptions)
|
789 |
cosine_similarities = cosine_similarity([embeddings[0]], embeddings[1:]).flatten()
|
790 |
|
791 |
-
#
|
792 |
-
|
793 |
-
|
794 |
-
# Находим индексы с максимальным сходством
|
795 |
-
max_similarity_indices = np.where(cosine_similarities == max_similarity)[0]
|
796 |
|
797 |
-
|
798 |
-
similar_rows = filtered_df.iloc[max_similarity_indices]
|
799 |
|
800 |
-
#
|
|
|
801 |
if 'Timestamp' not in similar_rows.columns:
|
802 |
print("Столбец 'Timestamp' отсутствует в данных.")
|
803 |
-
similar_row = similar_rows.iloc[0]
|
804 |
else:
|
805 |
-
similar_rows = similar_rows.copy()
|
806 |
similar_rows['Timestamp'] = pd.to_numeric(similar_rows['Timestamp'], errors='coerce')
|
807 |
similar_rows = similar_rows.sort_values(by='Timestamp', ascending=False)
|
808 |
-
similar_row = similar_rows.iloc[0]
|
809 |
|
810 |
-
|
811 |
-
|
812 |
-
|
813 |
-
|
|
|
|
|
|
|
814 |
|
815 |
-
|
816 |
-
return reference_message
|
817 |
|
818 |
|
819 |
def adapt_messages_to_best_example(
|
@@ -827,7 +831,7 @@ def adapt_messages_to_best_example(
|
|
827 |
approach,
|
828 |
*selected_values
|
829 |
):
|
830 |
-
#
|
831 |
gender = selected_values[0]
|
832 |
generation = selected_values[1]
|
833 |
psychotype = selected_values[2]
|
@@ -835,35 +839,40 @@ def adapt_messages_to_best_example(
|
|
835 |
industry = selected_values[4]
|
836 |
legal_form = selected_values[5]
|
837 |
|
838 |
-
#
|
839 |
-
|
840 |
|
841 |
-
if not
|
842 |
-
#
|
843 |
-
adapted_message = "
|
844 |
yield adapted_message, adapted_message, adapted_message, adapted_message, adapted_message
|
845 |
else:
|
846 |
-
|
|
|
|
|
|
|
847 |
prompt_template = (
|
848 |
"Сообщение для адаптации:\n\"{personalized_message}\"\n\n"
|
849 |
-
"Пример (НЕ ИСПОЛЬЗУЙ ФАКТЫ ИЛИ ДАННЫЕ ИЗ
|
850 |
-
"
|
851 |
-
"
|
852 |
-
"
|
853 |
-
"
|
854 |
-
"
|
|
|
855 |
)
|
856 |
-
|
857 |
-
#
|
858 |
adapted_gigachat_pro = ""
|
859 |
adapted_gigachat_lite = ""
|
860 |
adapted_gigachat_plus = ""
|
861 |
adapted_gpt4o = ""
|
862 |
adapted_meta_llama_405b = ""
|
863 |
|
864 |
-
#
|
865 |
prompt = prompt_template.format(
|
866 |
-
|
|
|
867 |
key_message=key_message,
|
868 |
personalized_message=personalized_gigachat_pro
|
869 |
)
|
@@ -872,9 +881,10 @@ def adapt_messages_to_best_example(
|
|
872 |
adapted_gigachat_pro_display = f"{adapted_gigachat_pro}\n\n------\nКоличество знаков: {adapted_gigachat_pro_length}"
|
873 |
yield adapted_gigachat_pro_display, "", "", "", ""
|
874 |
|
875 |
-
#
|
876 |
prompt = prompt_template.format(
|
877 |
-
|
|
|
878 |
key_message=key_message,
|
879 |
personalized_message=personalized_gigachat_lite
|
880 |
)
|
@@ -883,9 +893,10 @@ def adapt_messages_to_best_example(
|
|
883 |
adapted_gigachat_lite_display = f"{adapted_gigachat_lite}\n\n------\nКоличество знаков: {adapted_gigachat_lite_length}"
|
884 |
yield adapted_gigachat_pro_display, adapted_gigachat_lite_display, "", "", ""
|
885 |
|
886 |
-
#
|
887 |
prompt = prompt_template.format(
|
888 |
-
|
|
|
889 |
key_message=key_message,
|
890 |
personalized_message=personalized_gigachat_plus
|
891 |
)
|
@@ -894,9 +905,10 @@ def adapt_messages_to_best_example(
|
|
894 |
adapted_gigachat_plus_display = f"{adapted_gigachat_plus}\n\n------\nКоличество знаков: {adapted_gigachat_plus_length}"
|
895 |
yield adapted_gigachat_pro_display, adapted_gigachat_lite_display, adapted_gigachat_plus_display, "", ""
|
896 |
|
897 |
-
#
|
898 |
prompt = prompt_template.format(
|
899 |
-
|
|
|
900 |
key_message=key_message,
|
901 |
personalized_message=personalized_gpt4o
|
902 |
)
|
@@ -905,9 +917,10 @@ def adapt_messages_to_best_example(
|
|
905 |
adapted_gpt4o_display = f"{adapted_gpt4o}\n\n------\nКоличество знаков: {adapted_gpt4o_length}"
|
906 |
yield adapted_gigachat_pro_display, adapted_gigachat_lite_display, adapted_gigachat_plus_display, adapted_gpt4o_display, ""
|
907 |
|
908 |
-
#
|
909 |
prompt = prompt_template.format(
|
910 |
-
|
|
|
911 |
key_message=key_message,
|
912 |
personalized_message=personalized_meta_llama_405b
|
913 |
)
|
@@ -926,23 +939,29 @@ def update_best_example_prompt(description, *selected_values):
|
|
926 |
industry = selected_values[4]
|
927 |
legal_form = selected_values[5]
|
928 |
|
929 |
-
# Retrieve
|
930 |
-
|
931 |
|
932 |
-
if not
|
933 |
-
# No reference
|
934 |
-
best_prompt = "
|
935 |
else:
|
|
|
|
|
|
|
|
|
936 |
best_prompt = (
|
937 |
-
"Пример (НЕ ИСПОЛЬЗУЙ ФАКТЫ ИЛИ ДАННЫЕ ИЗ
|
|
|
938 |
"1. Перепиши сообщение для адаптации, сохранив его смысл.\n"
|
939 |
-
"2. Используй стиль, построение предложений и лексику, максимально похожие на
|
940 |
-
"3. НЕ ДОБАВЛЯЙ факты, цифры, или любую информацию из
|
941 |
"4. Убедись, что итоговое сообщение содержит ТОЛЬКО информацию из сообщения для адаптации и адаптировано по стилю и структуре."
|
942 |
-
).format(
|
943 |
|
944 |
return best_prompt
|
945 |
|
|
|
946 |
def adapt_messages_and_perform_checks(
|
947 |
description_input,
|
948 |
personalized_gigachat_pro,
|
|
|
725 |
import io
|
726 |
import numpy as np
|
727 |
from sentence_transformers import SentenceTransformer
|
728 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
729 |
+
import base64
|
730 |
+
import pandas as pd
|
731 |
+
import requests
|
732 |
+
import re
|
733 |
+
|
734 |
+
# Fetching the data from the repository
|
735 |
repo = "fruitpicker01/Storage_dev"
|
736 |
file_path = "messages.csv"
|
737 |
url = f"https://api.github.com/repos/{repo}/contents/{file_path}"
|
|
|
748 |
print(f"Error accessing the file: {response.status_code}")
|
749 |
return None
|
750 |
|
751 |
+
# Cleaning up and filtering the DataFrame
|
752 |
for col in ["Пол", "Поколение", "Психотип", "Стадия бизнеса", "Отрасль", "ОПФ"]:
|
753 |
df[col] = df[col].astype(str).str.strip().str.lower()
|
754 |
|
|
|
788 |
|
789 |
filtered_df['Описание предложения'] = filtered_df['Описание предложения'].fillna('')
|
790 |
|
791 |
+
# Using the model to generate embeddings
|
792 |
model = SentenceTransformer('sergeyzh/rubert-tiny-turbo')
|
793 |
descriptions = filtered_df['Описание предложения'].tolist()
|
794 |
+
descriptions.insert(0, current_description)
|
795 |
|
796 |
embeddings = model.encode(descriptions)
|
797 |
cosine_similarities = cosine_similarity([embeddings[0]], embeddings[1:]).flatten()
|
798 |
|
799 |
+
# Retrieve indices of top two most similar descriptions
|
800 |
+
top_two_indices = np.argsort(cosine_similarities)[-2:][::-1]
|
|
|
|
|
|
|
801 |
|
802 |
+
similar_rows = filtered_df.iloc[top_two_indices]
|
|
|
803 |
|
804 |
+
# Processing the messages
|
805 |
+
reference_messages = []
|
806 |
if 'Timestamp' not in similar_rows.columns:
|
807 |
print("Столбец 'Timestamp' отсутствует в данных.")
|
|
|
808 |
else:
|
|
|
809 |
similar_rows['Timestamp'] = pd.to_numeric(similar_rows['Timestamp'], errors='coerce')
|
810 |
similar_rows = similar_rows.sort_values(by='Timestamp', ascending=False)
|
|
|
811 |
|
812 |
+
for _, similar_row in similar_rows.iterrows():
|
813 |
+
if pd.notnull(similar_row.get("Откорректированное сообщение", None)) and similar_row["Откорректированное сообщение"].strip():
|
814 |
+
reference_message = similar_row["Откорректированное сообщение"]
|
815 |
+
else:
|
816 |
+
reference_message = similar_row.get("Персонализированное сообщение", "")
|
817 |
+
reference_message = re.sub(r'\n-{6,}\nКоличество знаков: \d+', '', reference_message).strip()
|
818 |
+
reference_messages.append(reference_message)
|
819 |
|
820 |
+
return reference_messages[:2] # Return the top two messages
|
|
|
821 |
|
822 |
|
823 |
def adapt_messages_to_best_example(
|
|
|
831 |
approach,
|
832 |
*selected_values
|
833 |
):
|
834 |
+
# Extract personalization parameters
|
835 |
gender = selected_values[0]
|
836 |
generation = selected_values[1]
|
837 |
psychotype = selected_values[2]
|
|
|
839 |
industry = selected_values[4]
|
840 |
legal_form = selected_values[5]
|
841 |
|
842 |
+
# Get the two most similar reference messages
|
843 |
+
reference_messages = get_reference_message(description, gender, generation, psychotype, business_stage, industry, legal_form)
|
844 |
|
845 |
+
if not reference_messages or len(reference_messages) < 2:
|
846 |
+
# If reference messages are not found
|
847 |
+
adapted_message = "Эталонные сообщения не найдены для выбранных параметров персонализации."
|
848 |
yield adapted_message, adapted_message, adapted_message, adapted_message, adapted_message
|
849 |
else:
|
850 |
+
reference_message_1 = reference_messages[0]
|
851 |
+
reference_message_2 = reference_messages[1]
|
852 |
+
|
853 |
+
# Modify the prompt template to include two reference messages
|
854 |
prompt_template = (
|
855 |
"Сообщение для адаптации:\n\"{personalized_message}\"\n\n"
|
856 |
+
"Пример 1 (НЕ ИСПОЛЬЗУЙ ФАКТЫ ИЛИ ДАННЫЕ ИЗ ПРИМЕРА):\n\"{reference_message_1}\"\n\n"
|
857 |
+
"Пример 2 (НЕ ИСПОЛЬЗУЙ ФАКТЫ ИЛИ ДАННЫЕ ИЗ ПРИМЕРА):\n\"{reference_message_2}\"\n\n"
|
858 |
+
"1. Перепиши сообщение для адаптации, сохранив его смысл.\n"
|
859 |
+
"2. Используй стиль, построение предложений и лексику, максимально похожие на примеры.\n"
|
860 |
+
"3. НЕ ДОБАВЛЯЙ факты, цифры, или любую информацию из примеров.\n"
|
861 |
+
"4. Убедись, что итоговое сообщение содержит ТОЛЬКО информацию из сообщения для адаптации и адаптировано по стилю и структуре.\n"
|
862 |
+
"5. Проверь, что итоговое сообщение включает следующую информацию: \n\"{key_message}\""
|
863 |
)
|
864 |
+
|
865 |
+
# Initialize empty variables for the messages
|
866 |
adapted_gigachat_pro = ""
|
867 |
adapted_gigachat_lite = ""
|
868 |
adapted_gigachat_plus = ""
|
869 |
adapted_gpt4o = ""
|
870 |
adapted_meta_llama_405b = ""
|
871 |
|
872 |
+
# Adapt message for GigaChat-Pro
|
873 |
prompt = prompt_template.format(
|
874 |
+
reference_message_1=reference_message_1,
|
875 |
+
reference_message_2=reference_message_2,
|
876 |
key_message=key_message,
|
877 |
personalized_message=personalized_gigachat_pro
|
878 |
)
|
|
|
881 |
adapted_gigachat_pro_display = f"{adapted_gigachat_pro}\n\n------\nКоличество знаков: {adapted_gigachat_pro_length}"
|
882 |
yield adapted_gigachat_pro_display, "", "", "", ""
|
883 |
|
884 |
+
# Adapt message for GigaChat-Lite
|
885 |
prompt = prompt_template.format(
|
886 |
+
reference_message_1=reference_message_1,
|
887 |
+
reference_message_2=reference_message_2,
|
888 |
key_message=key_message,
|
889 |
personalized_message=personalized_gigachat_lite
|
890 |
)
|
|
|
893 |
adapted_gigachat_lite_display = f"{adapted_gigachat_lite}\n\n------\nКоличество знаков: {adapted_gigachat_lite_length}"
|
894 |
yield adapted_gigachat_pro_display, adapted_gigachat_lite_display, "", "", ""
|
895 |
|
896 |
+
# Adapt message for GigaChat-Plus
|
897 |
prompt = prompt_template.format(
|
898 |
+
reference_message_1=reference_message_1,
|
899 |
+
reference_message_2=reference_message_2,
|
900 |
key_message=key_message,
|
901 |
personalized_message=personalized_gigachat_plus
|
902 |
)
|
|
|
905 |
adapted_gigachat_plus_display = f"{adapted_gigachat_plus}\n\n------\nКоличество знаков: {adapted_gigachat_plus_length}"
|
906 |
yield adapted_gigachat_pro_display, adapted_gigachat_lite_display, adapted_gigachat_plus_display, "", ""
|
907 |
|
908 |
+
# Adapt message for GPT-4o
|
909 |
prompt = prompt_template.format(
|
910 |
+
reference_message_1=reference_message_1,
|
911 |
+
reference_message_2=reference_message_2,
|
912 |
key_message=key_message,
|
913 |
personalized_message=personalized_gpt4o
|
914 |
)
|
|
|
917 |
adapted_gpt4o_display = f"{adapted_gpt4o}\n\n------\nКоличество знаков: {adapted_gpt4o_length}"
|
918 |
yield adapted_gigachat_pro_display, adapted_gigachat_lite_display, adapted_gigachat_plus_display, adapted_gpt4o_display, ""
|
919 |
|
920 |
+
# Adapt message for Meta-Llama-3.1-405B
|
921 |
prompt = prompt_template.format(
|
922 |
+
reference_message_1=reference_message_1,
|
923 |
+
reference_message_2=reference_message_2,
|
924 |
key_message=key_message,
|
925 |
personalized_message=personalized_meta_llama_405b
|
926 |
)
|
|
|
939 |
industry = selected_values[4]
|
940 |
legal_form = selected_values[5]
|
941 |
|
942 |
+
# Retrieve two reference messages
|
943 |
+
reference_messages = get_reference_message(description, gender, generation, psychotype, business_stage, industry, legal_form)
|
944 |
|
945 |
+
if not reference_messages or len(reference_messages) < 2:
|
946 |
+
# No reference messages found
|
947 |
+
best_prompt = "Эталонные сообщения не найдены для выбранных параметров персонализации."
|
948 |
else:
|
949 |
+
reference_message_1 = reference_messages[0]
|
950 |
+
reference_message_2 = reference_messages[1]
|
951 |
+
|
952 |
+
# Update the prompt to include two reference messages
|
953 |
best_prompt = (
|
954 |
+
"Пример 1 (НЕ ИСПОЛЬЗУЙ ФАКТЫ ИЛИ ДАННЫЕ ИЗ ПРИМЕРА):\n\"{reference_message_1}\"\n\n"
|
955 |
+
"Пример 2 (НЕ ИСПОЛЬЗУЙ ФАКТЫ ИЛИ ДАННЫЕ ИЗ ПРИМЕРА):\n\"{reference_message_2}\"\n\n"
|
956 |
"1. Перепиши сообщение для адаптации, сохранив его смысл.\n"
|
957 |
+
"2. Используй стиль, построение предложений и лексику, максимально похожие на примеры.\n"
|
958 |
+
"3. НЕ ДОБАВЛЯЙ факты, цифры, или любую информацию из примеров.\n"
|
959 |
"4. Убедись, что итоговое сообщение содержит ТОЛЬКО информацию из сообщения для адаптации и адаптировано по стилю и структуре."
|
960 |
+
).format(reference_message_1=reference_message_1, reference_message_2=reference_message_2)
|
961 |
|
962 |
return best_prompt
|
963 |
|
964 |
+
|
965 |
def adapt_messages_and_perform_checks(
|
966 |
description_input,
|
967 |
personalized_gigachat_pro,
|