Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import os
|
2 |
import re
|
|
|
|
|
3 |
from pypdf import PdfReader
|
4 |
import gradio as gr
|
5 |
from langchain_groq import ChatGroq
|
@@ -8,184 +10,220 @@ from langchain.vectorstores import Chroma
|
|
8 |
from langchain_core.documents import Document
|
9 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
10 |
|
11 |
-
embeddings = HuggingFaceEmbeddings(model_name="heydariAI/persian-embeddings")
|
12 |
-
vector_store = Chroma(embedding_function=embeddings)
|
13 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
14 |
-
|
15 |
models = ["deepseek-r1-distill-llama-70b", "llama-3.3-70b-versatile", "gemma2-9b-it"]
|
16 |
default_model = models[0]
|
17 |
-
model = ChatGroq(api_key="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", model_name=default_model)
|
18 |
-
|
19 |
-
chat_history = []
|
20 |
-
PRICE_PER_TOKEN = 0.00001
|
21 |
-
|
22 |
-
def summarize_chat(model):
|
23 |
-
|
24 |
-
chat_text = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in chat_history])
|
25 |
-
summary_prompt = f"یک خلاصه کوتاه از مکالمه زیر ارائه کن:\n\n{chat_text}\n\nخلاصه:"
|
26 |
-
summary_response = model.invoke(summary_prompt)
|
27 |
-
return summary_response.content
|
28 |
-
|
29 |
-
def process_file(file_path):
|
30 |
-
"""Process file and store in ChromaDB."""
|
31 |
-
if not file_path:
|
32 |
-
return None
|
33 |
-
file_extension = os.path.splitext(file_path)[1].lower()
|
34 |
-
try:
|
35 |
-
if file_extension == ".pdf":
|
36 |
-
|
37 |
-
reader = PdfReader(file_path)
|
38 |
-
file_text = "\n".join(page.extract_text() for page in reader.pages)
|
39 |
-
elif file_extension == ".txt":
|
40 |
-
with open(file_path, "r", encoding="utf-8") as f:
|
41 |
-
file_text = f.read()
|
42 |
-
else:
|
43 |
-
raise ValueError(f"Unsupported file format: {file_extension}")
|
44 |
-
|
45 |
-
file_docs = [Document(page_content=file_text, metadata={"source": "uploaded_file"})]
|
46 |
-
file_splits = text_splitter.split_documents(file_docs)
|
47 |
-
vector_store.add_documents(file_splits)
|
48 |
-
return file_text
|
49 |
-
except Exception as e:
|
50 |
-
raise RuntimeError(f"Error processing file: {str(e)}")
|
51 |
-
|
52 |
-
|
53 |
-
def answer_query(query, file_path, summarize, tone, model_name, creativity, keywords, language, response_length, welcome_message, exclusion_words):
|
54 |
-
global chat_history
|
55 |
-
|
56 |
-
model = ChatGroq(api_key="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", model_name=model_name)
|
57 |
-
try:
|
58 |
-
|
59 |
-
if file_path:
|
60 |
-
process_file(file_path)
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
search_query = f"{keywords} {query}" if keywords else query
|
63 |
-
retrieved_docs = vector_store.similarity_search(search_query, k=3)
|
64 |
knowledge = "\n\n".join(doc.page_content for doc in retrieved_docs)
|
65 |
-
|
66 |
tone_prompts = {
|
67 |
"رسمی": "پاسخ را با لحنی رسمی و مودبانه ارائه کن.",
|
68 |
"محاورهای": "پاسخ را به صورت دوستانه ارائه کن.",
|
69 |
"علمی": "پاسخ را با استدلالهای منطقی ارائه کن.",
|
70 |
"طنزآمیز": "پاسخ را با لحنی طنزآمیز ارائه کن.",
|
71 |
}
|
72 |
-
tone_instruction = tone_prompts.get(tone,
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
if response_length == "کوتاه":
|
77 |
length_instruction = "پاسخ را به صورت مختصر ارائه کن."
|
78 |
elif response_length == "بلند":
|
79 |
length_instruction = "پاسخ را به صورت مفصل و جامع ارائه کن."
|
80 |
else:
|
81 |
length_instruction = ""
|
82 |
-
|
83 |
exclusion_instruction = f"از کلمات زیر در پاسخ استفاده نکن: {exclusion_words}" if exclusion_words else ""
|
84 |
-
|
85 |
prompt = (
|
86 |
-
f"شما
|
87 |
f"{tone_instruction} {language_instruction} {length_instruction} {exclusion_instruction}\n\n"
|
88 |
)
|
89 |
-
|
90 |
-
if welcome_message and not chat_history:
|
91 |
prompt = f"{welcome_message}\n\n" + prompt
|
92 |
-
|
93 |
-
|
94 |
-
conversation_history = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in chat_history])
|
95 |
prompt = f"{conversation_history}\n\n" + prompt
|
96 |
-
|
97 |
prompt += f"اطلاعات مرتبط:\n{knowledge}\n\nسوال: {query}\nپاسخ:"
|
98 |
-
|
99 |
-
|
100 |
-
cleaned_response =
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
def
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
],
|
188 |
-
outputs=[chatbot, summary_output, token_count, token_price]
|
189 |
-
)
|
190 |
-
|
191 |
-
demo.launch()
|
|
|
1 |
import os
|
2 |
import re
|
3 |
+
import sqlite3
|
4 |
+
from datetime import datetime
|
5 |
from pypdf import PdfReader
|
6 |
import gradio as gr
|
7 |
from langchain_groq import ChatGroq
|
|
|
10 |
from langchain_core.documents import Document
|
11 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
12 |
|
|
|
|
|
|
|
|
|
13 |
models = ["deepseek-r1-distill-llama-70b", "llama-3.3-70b-versatile", "gemma2-9b-it"]
|
14 |
default_model = models[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
+
class DatabaseManager:
|
17 |
+
def __init__(self, db_name="chat_history.db"):
|
18 |
+
self.conn = sqlite3.connect(db_name)
|
19 |
+
self._create_tables()
|
20 |
+
def _create_tables(self):
|
21 |
+
cursor = self.conn.cursor()
|
22 |
+
cursor.execute(
|
23 |
+
'''CREATE TABLE IF NOT EXISTS chat_summaries
|
24 |
+
(id INTEGER PRIMARY KEY AUTOINCREMENT,
|
25 |
+
timestamp DATETIME,
|
26 |
+
summary TEXT,
|
27 |
+
model_used TEXT,
|
28 |
+
token_count INT)'''
|
29 |
+
)
|
30 |
+
self.conn.commit()
|
31 |
+
def save_summary(self, summary_data):
|
32 |
+
try:
|
33 |
+
cursor = self.conn.cursor()
|
34 |
+
cursor.execute(
|
35 |
+
'''INSERT INTO chat_summaries
|
36 |
+
(timestamp, summary, model_used, token_count)
|
37 |
+
VALUES (?, ?, ?, ?)''',
|
38 |
+
(datetime.now(),
|
39 |
+
summary_data['summary'],
|
40 |
+
summary_data['model'],
|
41 |
+
summary_data['tokens'])
|
42 |
+
)
|
43 |
+
self.conn.commit()
|
44 |
+
return True
|
45 |
+
except Exception as e:
|
46 |
+
print(f"Database error: {str(e)}")
|
47 |
+
return False
|
48 |
+
def load_summaries(self, limit=5):
|
49 |
+
cursor = self.conn.cursor()
|
50 |
+
cursor.execute(
|
51 |
+
"SELECT summary FROM chat_summaries ORDER BY id DESC LIMIT ?",
|
52 |
+
(limit,)
|
53 |
+
)
|
54 |
+
rows = cursor.fetchall()
|
55 |
+
return "\n".join([row[0] for row in rows])
|
56 |
+
|
57 |
+
class AICore:
|
58 |
+
def __init__(self):
|
59 |
+
self.embeddings = HuggingFaceEmbeddings(model_name="heydariAI/persian-embeddings")
|
60 |
+
self.vector_store = Chroma(embedding_function=self.embeddings)
|
61 |
+
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
62 |
+
self.chat_history = []
|
63 |
+
self.price_per_token = 0.00001
|
64 |
+
self.api_key = "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"
|
65 |
+
self.model = ChatGroq(api_key=self.api_key, model_name=default_model)
|
66 |
+
self.db = DatabaseManager()
|
67 |
+
def _init_model(self, model_name):
|
68 |
+
if self.model.model_name != model_name:
|
69 |
+
self.model = ChatGroq(api_key=self.api_key, model_name=model_name)
|
70 |
+
def summarize_chat(self):
|
71 |
+
chat_text = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in self.chat_history])
|
72 |
+
summary_prompt = f"یک خلاصه کوتاه از مکالمه زیر ارائه کن:\n\n{chat_text}\n\nخلاصه:"
|
73 |
+
summary_response = self.model.invoke(summary_prompt)
|
74 |
+
return summary_response.content
|
75 |
+
def process_file(self, file_obj):
|
76 |
+
if not file_obj:
|
77 |
+
return None
|
78 |
+
file_path = file_obj.name if hasattr(file_obj, "name") else file_obj
|
79 |
+
file_extension = os.path.splitext(file_path)[1].lower()
|
80 |
+
try:
|
81 |
+
if file_extension == ".pdf":
|
82 |
+
reader = PdfReader(file_path)
|
83 |
+
file_text = "\n".join(page.extract_text() for page in reader.pages)
|
84 |
+
elif file_extension == ".txt":
|
85 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
86 |
+
file_text = f.read()
|
87 |
+
else:
|
88 |
+
raise ValueError(f"Unsupported file format: {file_extension}")
|
89 |
+
file_docs = [Document(page_content=file_text, metadata={"source": "uploaded_file"})]
|
90 |
+
file_splits = self.text_splitter.split_documents(file_docs)
|
91 |
+
self.vector_store.add_documents(file_splits)
|
92 |
+
return file_text
|
93 |
+
except Exception as e:
|
94 |
+
raise RuntimeError(f"Error processing file: {str(e)}")
|
95 |
+
def count_tokens(self, text):
|
96 |
+
return len(text.split())
|
97 |
+
def calculate_price(self, input_text, output_text):
|
98 |
+
input_tokens = self.count_tokens(input_text)
|
99 |
+
output_tokens = self.count_tokens(output_text)
|
100 |
+
total_tokens = input_tokens + output_tokens
|
101 |
+
total_price = total_tokens * self.price_per_token
|
102 |
+
return total_tokens, f"{total_price:.6f} دلار"
|
103 |
+
def remove_think_sections(self, response_text):
|
104 |
+
return re.sub(r"<think>.*?</think>", "", response_text, flags=re.DOTALL)
|
105 |
+
def filter_to_persian(self, text):
|
106 |
+
return re.sub(r'[^\u0600-\u06FF\s\.,؛؟!٪،0-9]', '', text)
|
107 |
+
def answer_query(self, query, file_obj, summarize, tone, model_name, creativity,
|
108 |
+
keywords, language, response_length, welcome_message, exclusion_words):
|
109 |
+
self._init_model(model_name)
|
110 |
+
if file_obj:
|
111 |
+
self.process_file(file_obj)
|
112 |
search_query = f"{keywords} {query}" if keywords else query
|
113 |
+
retrieved_docs = self.vector_store.similarity_search(search_query, k=3)
|
114 |
knowledge = "\n\n".join(doc.page_content for doc in retrieved_docs)
|
|
|
115 |
tone_prompts = {
|
116 |
"رسمی": "پاسخ را با لحنی رسمی و مودبانه ارائه کن.",
|
117 |
"محاورهای": "پاسخ را به صورت دوستانه ارائه کن.",
|
118 |
"علمی": "پاسخ را با استدلالهای منطقی ارائه کن.",
|
119 |
"طنزآمیز": "پاسخ را با لحنی طنزآمیز ارائه کن.",
|
120 |
}
|
121 |
+
tone_instruction = tone_prompts.get(tone, f"پاسخ را به زبان {language} ارائه کن.")
|
122 |
+
language_instruction = (f"پاسخ را فقط به زبان {language} ارائه کن و از زبان دیگری استفاده نکن مگر آنکه بخواهی کد بنویسی "
|
123 |
+
f"که در آن صورت فقط از زبان انگلیسی استفاده کن مگر اینکه کاربر از تو درخواست کند از زبان دیگری استفاده بکنی و از زبان چینی استفاده نکن.") if language else ""
|
|
|
124 |
if response_length == "کوتاه":
|
125 |
length_instruction = "پاسخ را به صورت مختصر ارائه کن."
|
126 |
elif response_length == "بلند":
|
127 |
length_instruction = "پاسخ را به صورت مفصل و جامع ارائه کن."
|
128 |
else:
|
129 |
length_instruction = ""
|
|
|
130 |
exclusion_instruction = f"از کلمات زیر در پاسخ استفاده نکن: {exclusion_words}" if exclusion_words else ""
|
|
|
131 |
prompt = (
|
132 |
+
f"شما Parviz Mind هستید، یک دستیار هوش مصنوعی ساخته شده توسط امیرمهدی پرویز دانشجو دانشگاه صنعتی کرمانشاه "
|
133 |
f"{tone_instruction} {language_instruction} {length_instruction} {exclusion_instruction}\n\n"
|
134 |
)
|
135 |
+
if welcome_message and not self.chat_history:
|
|
|
136 |
prompt = f"{welcome_message}\n\n" + prompt
|
137 |
+
if self.chat_history:
|
138 |
+
conversation_history = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in self.chat_history])
|
|
|
139 |
prompt = f"{conversation_history}\n\n" + prompt
|
|
|
140 |
prompt += f"اطلاعات مرتبط:\n{knowledge}\n\nسوال: {query}\nپاسخ:"
|
141 |
+
response = self.model.invoke(prompt, temperature=creativity)
|
142 |
+
cleaned_response = self.remove_think_sections(response.content)
|
143 |
+
cleaned_response = self.filter_to_persian(cleaned_response)
|
144 |
+
self.chat_history.append((query, cleaned_response))
|
145 |
+
total_tokens, price = self.calculate_price(prompt, cleaned_response)
|
146 |
+
summary_text = self.summarize_chat() if summarize else "خلاصهسازی غیرفعال است."
|
147 |
+
if summarize and summary_text != "خلاصهسازی غیرفعال است.":
|
148 |
+
self.db.save_summary({
|
149 |
+
'summary': summary_text,
|
150 |
+
'model': model_name,
|
151 |
+
'tokens': total_tokens
|
152 |
+
})
|
153 |
+
return cleaned_response, summary_text, total_tokens, price
|
154 |
+
def clear_history(self):
|
155 |
+
self.chat_history = []
|
156 |
+
return self.chat_history
|
157 |
+
|
158 |
+
class ChatInterface:
|
159 |
+
def __init__(self, ai_core: AICore):
|
160 |
+
self.ai = ai_core
|
161 |
+
self._create_interface()
|
162 |
+
def _create_interface(self):
|
163 |
+
with gr.Blocks() as self.interface:
|
164 |
+
gr.Markdown("## 🤖 Parviz Mind")
|
165 |
+
gr.Markdown("**یک فایل (PDF یا TXT) آپلود کنید و سوال خود را بپرسید.**")
|
166 |
+
self.chatbot = gr.Chatbot(label="💬 تاریخچه چت")
|
167 |
+
self.query_input = gr.Textbox(label="❓ سوال خود را وارد کنید")
|
168 |
+
self.summarize_checkbox = gr.Checkbox(label="📌 خلاصهساز را فعال کن")
|
169 |
+
self.submit_button = gr.Button("🚀 ارسال")
|
170 |
+
self.del_button = gr.Button("🗑 پاک کردن حافظه")
|
171 |
+
self.file_input = gr.File(label="📂 آپلود فایل", file_types=[".pdf", ".txt"])
|
172 |
+
with gr.Accordion("خلاصه چت", open=False):
|
173 |
+
with gr.Row():
|
174 |
+
self.summary_output = gr.Textbox(label="📌 خلاصه مکالمه", interactive=False)
|
175 |
+
with gr.Accordion("تنظیمات پیشرفته", open=False):
|
176 |
+
with gr.Row():
|
177 |
+
self.model_dropdown = gr.Dropdown(label="🔍 انتخاب مدل", choices=models, value=default_model)
|
178 |
+
self.tone_dropdown = gr.Dropdown(label="🎭 لحن پاسخ", choices=["رسمی", "محاورهای", "علمی", "طنزآمیز"], value="رسمی")
|
179 |
+
self.language_dropdown = gr.Dropdown(label="🌐 زبان چت بات", choices=["فارسی", "انگلیسی", "عربی"], value="فارسی")
|
180 |
+
self.token_count = gr.Textbox(label="🔢 تعداد توکنها", interactive=False)
|
181 |
+
self.token_price = gr.Textbox(label="💰 هزینه تخمینی", interactive=False)
|
182 |
+
with gr.Row():
|
183 |
+
self.creativity_slider = gr.Slider(label="🎨 خلاقیت (Temperature)", minimum=0.0, maximum=1.0, step=0.1, value=0.7)
|
184 |
+
self.response_length_dropdown = gr.Dropdown(label="📏 طول پاسخ", choices=["کوتاه", "بلند"], value="بلند")
|
185 |
+
self.keywords_input = gr.Textbox(label="🔑 کلمات کلیدی (اختیاری)")
|
186 |
+
self.welcome_message_input = gr.Textbox(label="👋 پیام خوش آمدگویی (اختیاری)")
|
187 |
+
self.exclusion_words_input = gr.Textbox(label="🚫 کلمات استثنا (اختیاری)")
|
188 |
+
self.del_button.click(
|
189 |
+
self.clear_chat,
|
190 |
+
inputs=[],
|
191 |
+
outputs=[self.chatbot, self.summary_output, self.token_count, self.token_price]
|
192 |
+
)
|
193 |
+
self.submit_button.click(
|
194 |
+
self.process_chat,
|
195 |
+
inputs=[
|
196 |
+
self.query_input, self.file_input, self.summarize_checkbox,
|
197 |
+
self.tone_dropdown, self.model_dropdown, self.creativity_slider,
|
198 |
+
self.keywords_input, self.language_dropdown, self.response_length_dropdown,
|
199 |
+
self.welcome_message_input, self.exclusion_words_input
|
200 |
+
],
|
201 |
+
outputs=[self.chatbot, self.summary_output, self.token_count, self.token_price]
|
202 |
+
)
|
203 |
+
self.query_input.submit(
|
204 |
+
self.process_chat,
|
205 |
+
inputs=[
|
206 |
+
self.query_input, self.file_input, self.summarize_checkbox,
|
207 |
+
self.tone_dropdown, self.model_dropdown, self.creativity_slider,
|
208 |
+
self.keywords_input, self.language_dropdown, self.response_length_dropdown,
|
209 |
+
self.welcome_message_input, self.exclusion_words_input
|
210 |
+
],
|
211 |
+
outputs=[self.chatbot, self.summary_output, self.token_count, self.token_price]
|
212 |
+
)
|
213 |
+
def process_chat(self, query, file_obj, summarize, tone, model_name, creativity,
|
214 |
+
keywords, language, response_length, welcome_message, exclusion_words):
|
215 |
+
response, summary, total_tokens, price = self.ai.answer_query(
|
216 |
+
query, file_obj, summarize, tone, model_name, creativity,
|
217 |
+
keywords, language, response_length, welcome_message, exclusion_words
|
218 |
+
)
|
219 |
+
return self.ai.chat_history, summary, total_tokens, price
|
220 |
+
def clear_chat(self):
|
221 |
+
self.ai.clear_history()
|
222 |
+
return self.ai.chat_history, "", 0, "0 دلار"
|
223 |
+
def launch(self):
|
224 |
+
self.interface.launch()
|
225 |
+
|
226 |
+
if __name__ == "__main__":
|
227 |
+
ai_core = AICore()
|
228 |
+
chat_app = ChatInterface(ai_core)
|
229 |
+
chat_app.launch()
|
|
|
|
|
|
|
|
|
|