Spaces:
Running
Running
import os | |
import re | |
import sqlite3 | |
from datetime import datetime | |
from pypdf import PdfReader | |
import gradio as gr | |
from langchain_groq import ChatGroq | |
from langchain_huggingface import HuggingFaceEmbeddings | |
from langchain.vectorstores import Chroma | |
from langchain_core.documents import Document | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
models = ["deepseek-r1-distill-llama-70b", "llama-3.3-70b-versatile", "gemma2-9b-it"] | |
default_model = models[0] | |
class DatabaseManager: | |
def __init__(self, db_name="chat_history.db"): | |
self.conn = sqlite3.connect(db_name) | |
self._create_tables() | |
def _create_tables(self): | |
cursor = self.conn.cursor() | |
cursor.execute( | |
'''CREATE TABLE IF NOT EXISTS chat_summaries | |
(id INTEGER PRIMARY KEY AUTOINCREMENT, | |
timestamp DATETIME, | |
summary TEXT, | |
model_used TEXT, | |
token_count INT)''' | |
) | |
self.conn.commit() | |
def save_summary(self, summary_data): | |
try: | |
cursor = self.conn.cursor() | |
cursor.execute( | |
'''INSERT INTO chat_summaries | |
(timestamp, summary, model_used, token_count) | |
VALUES (?, ?, ?, ?)''', | |
(datetime.now(), | |
summary_data['summary'], | |
summary_data['model'], | |
summary_data['tokens']) | |
) | |
self.conn.commit() | |
return True | |
except Exception as e: | |
print(f"Database error: {str(e)}") | |
return False | |
def load_summaries(self, limit=5): | |
cursor = self.conn.cursor() | |
cursor.execute( | |
"SELECT summary FROM chat_summaries ORDER BY id DESC LIMIT ?", | |
(limit,) | |
) | |
rows = cursor.fetchall() | |
return "\n".join([row[0] for row in rows]) | |
class AICore: | |
def __init__(self): | |
self.embeddings = HuggingFaceEmbeddings(model_name="heydariAI/persian-embeddings") | |
self.vector_store = Chroma(embedding_function=self.embeddings) | |
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
self.chat_history = [] | |
self.price_per_token = 0.00001 | |
self.api_key = "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" | |
self.model = ChatGroq(api_key=self.api_key, model_name=default_model) | |
self.db = DatabaseManager() | |
def _init_model(self, model_name): | |
if self.model.model_name != model_name: | |
self.model = ChatGroq(api_key=self.api_key, model_name=model_name) | |
def summarize_chat(self): | |
chat_text = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in self.chat_history]) | |
summary_prompt = f"یک خلاصه کوتاه از مکالمه زیر ارائه کن:\n\n{chat_text}\n\nخلاصه:" | |
summary_response = self.model.invoke(summary_prompt) | |
return summary_response.content | |
def process_file(self, file_obj): | |
if not file_obj: | |
return None | |
file_path = file_obj.name if hasattr(file_obj, "name") else file_obj | |
file_extension = os.path.splitext(file_path)[1].lower() | |
try: | |
if file_extension == ".pdf": | |
reader = PdfReader(file_path) | |
file_text = "\n".join(page.extract_text() for page in reader.pages) | |
elif file_extension == ".txt": | |
with open(file_path, "r", encoding="utf-8") as f: | |
file_text = f.read() | |
else: | |
raise ValueError(f"Unsupported file format: {file_extension}") | |
file_docs = [Document(page_content=file_text, metadata={"source": "uploaded_file"})] | |
file_splits = self.text_splitter.split_documents(file_docs) | |
self.vector_store.add_documents(file_splits) | |
return file_text | |
except Exception as e: | |
raise RuntimeError(f"Error processing file: {str(e)}") | |
def count_tokens(self, text): | |
return len(text.split()) | |
def calculate_price(self, input_text, output_text): | |
input_tokens = self.count_tokens(input_text) | |
output_tokens = self.count_tokens(output_text) | |
total_tokens = input_tokens + output_tokens | |
total_price = total_tokens * self.price_per_token | |
return total_tokens, f"{total_price:.6f} دلار" | |
def remove_think_sections(self, response_text): | |
return re.sub(r"<think>.*?</think>", "", response_text, flags=re.DOTALL) | |
def filter_to_persian(self, text): | |
return re.sub(r'[^\u0600-\u06FF\s\.,؛؟!٪،0-9]', '', text) | |
def answer_query(self, query, file_obj, summarize, tone, model_name, creativity, | |
keywords, language, response_length, welcome_message, exclusion_words): | |
self._init_model(model_name) | |
if file_obj: | |
self.process_file(file_obj) | |
search_query = f"{keywords} {query}" if keywords else query | |
retrieved_docs = self.vector_store.similarity_search(search_query, k=3) | |
knowledge = "\n\n".join(doc.page_content for doc in retrieved_docs) | |
tone_prompts = { | |
"رسمی": "پاسخ را با لحنی رسمی و مودبانه ارائه کن.", | |
"محاورهای": "پاسخ را به صورت دوستانه ارائه کن.", | |
"علمی": "پاسخ را با استدلالهای منطقی ارائه کن.", | |
"طنزآمیز": "پاسخ را با لحنی طنزآمیز ارائه کن.", | |
} | |
tone_instruction = tone_prompts.get(tone, f"پاسخ را به زبان {language} ارائه کن.") | |
language_instruction = (f"پاسخ را فقط به زبان {language} ارائه کن و از زبان دیگری استفاده نکن مگر آنکه بخواهی کد بنویسی " | |
f"که در آن صورت فقط از زبان انگلیسی استفاده کن مگر اینکه کاربر از تو درخواست کند از زبان دیگری استفاده بکنی و از زبان چینی استفاده نکن.") if language else "" | |
if response_length == "کوتاه": | |
length_instruction = "پاسخ را به صورت مختصر ارائه کن." | |
elif response_length == "بلند": | |
length_instruction = "پاسخ را به صورت مفصل و جامع ارائه کن." | |
else: | |
length_instruction = "" | |
exclusion_instruction = f"از کلمات زیر در پاسخ استفاده نکن: {exclusion_words}" if exclusion_words else "" | |
prompt = ( | |
f"شما Parviz Mind هستید، یک دستیار هوش مصنوعی ساخته شده توسط امیرمهدی پرویز دانشجو دانشگاه صنعتی کرمانشاه " | |
f"{tone_instruction} {language_instruction} {length_instruction} {exclusion_instruction}\n\n" | |
) | |
if welcome_message and not self.chat_history: | |
prompt = f"{welcome_message}\n\n" + prompt | |
if self.chat_history: | |
conversation_history = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in self.chat_history]) | |
prompt = f"{conversation_history}\n\n" + prompt | |
prompt += f"اطلاعات مرتبط:\n{knowledge}\n\nسوال: {query}\nپاسخ:" | |
response = self.model.invoke(prompt, temperature=creativity) | |
cleaned_response = self.remove_think_sections(response.content) | |
cleaned_response = self.filter_to_persian(cleaned_response) | |
self.chat_history.append((query, cleaned_response)) | |
total_tokens, price = self.calculate_price(prompt, cleaned_response) | |
summary_text = self.summarize_chat() if summarize else "خلاصهسازی غیرفعال است." | |
if summarize and summary_text != "خلاصهسازی غیرفعال است.": | |
self.db.save_summary({ | |
'summary': summary_text, | |
'model': model_name, | |
'tokens': total_tokens | |
}) | |
return cleaned_response, summary_text, total_tokens, price | |
def clear_history(self): | |
self.chat_history = [] | |
return self.chat_history | |
class ChatInterface: | |
def __init__(self, ai_core: AICore): | |
self.ai = ai_core | |
self._create_interface() | |
def _create_interface(self): | |
with gr.Blocks() as self.interface: | |
gr.Markdown("## 🤖 Parviz Mind") | |
gr.Markdown("**یک فایل (PDF یا TXT) آپلود کنید و سوال خود را بپرسید.**") | |
self.chatbot = gr.Chatbot(label="💬 تاریخچه چت") | |
self.query_input = gr.Textbox(label="❓ سوال خود را وارد کنید") | |
self.summarize_checkbox = gr.Checkbox(label="📌 خلاصهساز را فعال کن") | |
self.submit_button = gr.Button("🚀 ارسال") | |
self.del_button = gr.Button("🗑 پاک کردن حافظه") | |
self.file_input = gr.File(label="📂 آپلود فایل", file_types=[".pdf", ".txt"]) | |
with gr.Accordion("خلاصه چت", open=False): | |
with gr.Row(): | |
self.summary_output = gr.Textbox(label="📌 خلاصه مکالمه", interactive=False) | |
with gr.Accordion("تنظیمات پیشرفته", open=False): | |
with gr.Row(): | |
self.model_dropdown = gr.Dropdown(label="🔍 انتخاب مدل", choices=models, value=default_model) | |
self.tone_dropdown = gr.Dropdown(label="🎭 لحن پاسخ", choices=["رسمی", "محاورهای", "علمی", "طنزآمیز"], value="رسمی") | |
self.language_dropdown = gr.Dropdown(label="🌐 زبان چت بات", choices=["فارسی", "انگلیسی", "عربی"], value="فارسی") | |
self.token_count = gr.Textbox(label="🔢 تعداد توکنها", interactive=False) | |
self.token_price = gr.Textbox(label="💰 هزینه تخمینی", interactive=False) | |
with gr.Row(): | |
self.creativity_slider = gr.Slider(label="🎨 خلاقیت (Temperature)", minimum=0.0, maximum=1.0, step=0.1, value=0.7) | |
self.response_length_dropdown = gr.Dropdown(label="📏 طول پاسخ", choices=["کوتاه", "بلند"], value="بلند") | |
self.keywords_input = gr.Textbox(label="🔑 کلمات کلیدی (اختیاری)") | |
self.welcome_message_input = gr.Textbox(label="👋 پیام خوش آمدگویی (اختیاری)") | |
self.exclusion_words_input = gr.Textbox(label="🚫 کلمات استثنا (اختیاری)") | |
self.del_button.click( | |
self.clear_chat, | |
inputs=[], | |
outputs=[self.chatbot, self.summary_output, self.token_count, self.token_price] | |
) | |
self.submit_button.click( | |
self.process_chat, | |
inputs=[ | |
self.query_input, self.file_input, self.summarize_checkbox, | |
self.tone_dropdown, self.model_dropdown, self.creativity_slider, | |
self.keywords_input, self.language_dropdown, self.response_length_dropdown, | |
self.welcome_message_input, self.exclusion_words_input | |
], | |
outputs=[self.chatbot, self.summary_output, self.token_count, self.token_price] | |
) | |
self.query_input.submit( | |
self.process_chat, | |
inputs=[ | |
self.query_input, self.file_input, self.summarize_checkbox, | |
self.tone_dropdown, self.model_dropdown, self.creativity_slider, | |
self.keywords_input, self.language_dropdown, self.response_length_dropdown, | |
self.welcome_message_input, self.exclusion_words_input | |
], | |
outputs=[self.chatbot, self.summary_output, self.token_count, self.token_price] | |
) | |
def process_chat(self, query, file_obj, summarize, tone, model_name, creativity, | |
keywords, language, response_length, welcome_message, exclusion_words): | |
response, summary, total_tokens, price = self.ai.answer_query( | |
query, file_obj, summarize, tone, model_name, creativity, | |
keywords, language, response_length, welcome_message, exclusion_words | |
) | |
return self.ai.chat_history, summary, total_tokens, price | |
def clear_chat(self): | |
self.ai.clear_history() | |
return self.ai.chat_history, "", 0, "0 دلار" | |
def launch(self): | |
self.interface.launch() | |
if __name__ == "__main__": | |
ai_core = AICore() | |
chat_app = ChatInterface(ai_core) | |
chat_app.launch() |