Spaces:

GIGAParviz
/

Parviz_Mind

Sleeping

App Files Files

GIGAParviz commited on Feb 12

Commit

33a37f2

verified ·

1 Parent(s): a2275e1

Update app.py

Browse files

Files changed (1) hide show

app.py +195 -157

app.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import os
 import re
 from pypdf import PdfReader
 import gradio as gr
 from langchain_groq import ChatGroq
@@ -8,184 +10,220 @@ from langchain.vectorstores import Chroma
 from langchain_core.documents import Document
 from langchain_text_splitters import RecursiveCharacterTextSplitter
-embeddings = HuggingFaceEmbeddings(model_name="heydariAI/persian-embeddings")
-vector_store = Chroma(embedding_function=embeddings)
-text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
 models = ["deepseek-r1-distill-llama-70b", "llama-3.3-70b-versatile", "gemma2-9b-it"]
 default_model = models[0]
-model = ChatGroq(api_key="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", model_name=default_model)
-chat_history = []
-PRICE_PER_TOKEN = 0.00001
-def summarize_chat(model):
-    chat_text = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in chat_history])
-    summary_prompt = f"یک خلاصه کوتاه از مکالمه زیر ارائه کن:\n\n{chat_text}\n\nخلاصه:"
-    summary_response = model.invoke(summary_prompt)
-    return summary_response.content
-def process_file(file_path):
-    """Process file and store in ChromaDB."""
-    if not file_path:
-        return None
-    file_extension = os.path.splitext(file_path)[1].lower()
-    try:
-        if file_extension == ".pdf":
-            reader = PdfReader(file_path)
-            file_text = "\n".join(page.extract_text() for page in reader.pages)
-        elif file_extension == ".txt":
-            with open(file_path, "r", encoding="utf-8") as f:
-                file_text = f.read()
-        else:
-            raise ValueError(f"Unsupported file format: {file_extension}")
-        file_docs = [Document(page_content=file_text, metadata={"source": "uploaded_file"})]
-        file_splits = text_splitter.split_documents(file_docs)
-        vector_store.add_documents(file_splits)
-        return file_text
-    except Exception as e:
-        raise RuntimeError(f"Error processing file: {str(e)}")
-def answer_query(query, file_path, summarize, tone, model_name, creativity, keywords, language, response_length, welcome_message, exclusion_words):
-    global chat_history
-    model = ChatGroq(api_key="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", model_name=model_name)
-    try:
-        if file_path:
-            process_file(file_path)
         search_query = f"{keywords} {query}" if keywords else query
-        retrieved_docs = vector_store.similarity_search(search_query, k=3)
         knowledge = "\n\n".join(doc.page_content for doc in retrieved_docs)
         tone_prompts = {
             "رسمی": "پاسخ را با لحنی رسمی و مودبانه ارائه کن.",
             "محاوره‌ای": "پاسخ را به صورت دوستانه ارائه کن.",
             "علمی": "پاسخ را با استدلال‌های منطقی ارائه کن.",
             "طنزآمیز": "پاسخ را با لحنی طنزآمیز ارائه کن.",
         }
-        tone_instruction = tone_prompts.get(tone, (f"پاسخ را به زبان {language} ارائه کن."))
-        language_instruction = f"پاسخ را فقط به زبان {language} ارائه کن و از زبان دیگری استفاده نکن مگر آنکه بخواهی کد بنویسی که در آن صورت فقط از زبان انگلیسی استفاده کن مگر اینکه کاربر از تو درخواست کند از زبان دیگری استفاده بکنی و از زبان چینی استفاده نکن." if language else ""
         if response_length == "کوتاه":
             length_instruction = "پاسخ را به صورت مختصر ارائه کن."
         elif response_length == "بلند":
             length_instruction = "پاسخ را به صورت مفصل و جامع ارائه کن."
         else:
             length_instruction = ""
         exclusion_instruction = f"از کلمات زیر در پاسخ استفاده نکن: {exclusion_words}" if exclusion_words else ""
         prompt = (
-            f"شما ParvizGPT هستید، یک دستیار هوش مصنوعی ساخته شده توسط امیرمهدی پرویز دانشجو دانشگاه صنعتی کرمانشاه "
             f"{tone_instruction} {language_instruction} {length_instruction} {exclusion_instruction}\n\n"
         )
-        if welcome_message and not chat_history:
             prompt = f"{welcome_message}\n\n" + prompt
-        if chat_history:
-            conversation_history = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in chat_history])
             prompt = f"{conversation_history}\n\n" + prompt
         prompt += f"اطلاعات مرتبط:\n{knowledge}\n\nسوال: {query}\nپاسخ:"
-        response = model.invoke(prompt, temperature=creativity)
-        cleaned_response = remove_think_sections(response.content)
-        chat_history.append((query, cleaned_response))
-        total_tokens, price = calculate_price(prompt, cleaned_response)
-        summary = summarize_chat(model) if summarize else "خلاصه‌سازی غیرفعال است."
-        return cleaned_response, summary, total_tokens, price
-    except Exception as e:
-        return f"خطا: {str(e)}", "", 0, "0 دلار"
-def count_tokens(text):
-    return len(text.split())
-def calculate_price(input_text, output_text):
-    input_tokens = count_tokens(input_text)
-    output_tokens = count_tokens(output_text)
-    total_tokens = input_tokens + output_tokens
-    total_price = total_tokens * PRICE_PER_TOKEN
-    return total_tokens, f"{total_price:.6f} دلار"
-def remove_think_sections(response_text):
-    return re.sub(r"<think>.*?</think>", "", response_text, flags=re.DOTALL)
-def chat_with_bot(query, file, summarize, tone, model_name, creativity, keywords, language, response_length, welcome_message, exclusion_words, chat_history):
-    file_path = file.name if file else None
-    response, summary, total_tokens, price = answer_query(query, file_path, summarize, tone, model_name, creativity, keywords, language, response_length, welcome_message, exclusion_words)
-    chat_history.append((query, response))
-    return chat_history, summary, total_tokens, price
-def clear_memory():
-    global chat_history
-    chat_history = []
-    return [], "", 0, "0 دلار"
-with gr.Blocks() as demo:
-    gr.Markdown("## 🤖 Parviz GPT - چت بات هوش مصنوعی")
-    gr.Markdown("**یک فایل (PDF یا TXT) آپلود کنید و سوال خود را بپرسید.**")
-    chatbot = gr.Chatbot(label="💬 تاریخچه چت")
-    query_input = gr.Textbox(label="❓ سوال خود را وارد کنید")
-    summarize_checkbox = gr.Checkbox(label="📌 خلاصه‌ساز را فعال کن")
-    submit_button = gr.Button("🚀 ارسال")
-    del_button = gr.Button("🗑 پاک کردن حافظه")
-    summary_output = gr.Textbox(label="📌 خلاصه مکالمه", interactive=False)
-    token_count = gr.Textbox(label="🔢 تعداد توکن‌ها", interactive=False)
-    token_price = gr.Textbox(label="💰 هزینه تخمینی", interactive=False)
-    file_input = gr.File(label="📂 آپلود فایل", file_types=[".pdf", ".txt"])
-    with gr.Row():
-        model_dropdown = gr.Dropdown(label="🔍 انتخاب مدل", choices=models, value=default_model)
-        tone_dropdown = gr.Dropdown(label="🎭 لحن پاسخ", choices=["رسمی", "محاوره‌ای", "علمی", "طنزآمیز"], value="رسمی")
-        language_dropdown = gr.Dropdown(label="🌐 زبان چت بات", choices=["فارسی", "انگلیسی", "عربی"], value="فارسی")
-    with gr.Row():
-        creativity_slider = gr.Slider(label="🎨 خلاقیت (Temperature)", minimum=0.0, maximum=1.0, step=0.1, value=0.7)
-        response_length_dropdown = gr.Dropdown(label="📏 طول پاسخ", choices=["کوتاه", "بلند"], value="بلند")
-    keywords_input = gr.Textbox(label="🔑 کلمات کلیدی (اختیاری)")
-    welcome_message_input = gr.Textbox(label="👋 پیام خوش آمدگویی (اختیاری)")
-    exclusion_words_input = gr.Textbox(label="🚫 کلمات استثنا (اختیاری)")
-    del_button.click(
-        clear_memory,
-        inputs=[],
-        outputs=[chatbot, summary_output, token_count, token_price]
-    )
-    submit_button.click(
-        chat_with_bot,
-        inputs=[
-            query_input, file_input, summarize_checkbox, tone_dropdown, model_dropdown,
-            creativity_slider, keywords_input, language_dropdown, response_length_dropdown,
-            welcome_message_input, exclusion_words_input, chatbot
-        ],
-        outputs=[chatbot, summary_output, token_count, token_price]
-    )
-    query_input.submit(
-        chat_with_bot,
-        inputs=[
-            query_input, file_input, summarize_checkbox, tone_dropdown, model_dropdown,
-            creativity_slider, keywords_input, language_dropdown, response_length_dropdown,
-            welcome_message_input, exclusion_words_input, chatbot
-        ],
-        outputs=[chatbot, summary_output, token_count, token_price]
-    )
-demo.launch()

 import os
 import re
+import sqlite3
+from datetime import datetime
 from pypdf import PdfReader
 import gradio as gr
 from langchain_groq import ChatGroq
 from langchain_core.documents import Document
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 models = ["deepseek-r1-distill-llama-70b", "llama-3.3-70b-versatile", "gemma2-9b-it"]
 default_model = models[0]
+class DatabaseManager:
+    def __init__(self, db_name="chat_history.db"):
+        self.conn = sqlite3.connect(db_name)
+        self._create_tables()
+    def _create_tables(self):
+        cursor = self.conn.cursor()
+        cursor.execute(
+            '''CREATE TABLE IF NOT EXISTS chat_summaries
+               (id INTEGER PRIMARY KEY AUTOINCREMENT,
+                timestamp DATETIME,
+                summary TEXT,
+                model_used TEXT,
+                token_count INT)'''
+        )
+        self.conn.commit()
+    def save_summary(self, summary_data):
+        try:
+            cursor = self.conn.cursor()
+            cursor.execute(
+                '''INSERT INTO chat_summaries
+                   (timestamp, summary, model_used, token_count)
+                   VALUES (?, ?, ?, ?)''',
+                (datetime.now(),
+                 summary_data['summary'],
+                 summary_data['model'],
+                 summary_data['tokens'])
+            )
+            self.conn.commit()
+            return True
+        except Exception as e:
+            print(f"Database error: {str(e)}")
+            return False
+    def load_summaries(self, limit=5):
+        cursor = self.conn.cursor()
+        cursor.execute(
+            "SELECT summary FROM chat_summaries ORDER BY id DESC LIMIT ?",
+            (limit,)
+        )
+        rows = cursor.fetchall()
+        return "\n".join([row[0] for row in rows])
+class AICore:
+    def __init__(self):
+        self.embeddings = HuggingFaceEmbeddings(model_name="heydariAI/persian-embeddings")
+        self.vector_store = Chroma(embedding_function=self.embeddings)
+        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+        self.chat_history = []
+        self.price_per_token = 0.00001
+        self.api_key = "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"
+        self.model = ChatGroq(api_key=self.api_key, model_name=default_model)
+        self.db = DatabaseManager()
+    def _init_model(self, model_name):
+        if self.model.model_name != model_name:
+            self.model = ChatGroq(api_key=self.api_key, model_name=model_name)
+    def summarize_chat(self):
+        chat_text = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in self.chat_history])
+        summary_prompt = f"یک خلاصه کوتاه از مکالمه زیر ارائه کن:\n\n{chat_text}\n\nخلاصه:"
+        summary_response = self.model.invoke(summary_prompt)
+        return summary_response.content
+    def process_file(self, file_obj):
+        if not file_obj:
+            return None
+        file_path = file_obj.name if hasattr(file_obj, "name") else file_obj
+        file_extension = os.path.splitext(file_path)[1].lower()
+        try:
+            if file_extension == ".pdf":
+                reader = PdfReader(file_path)
+                file_text = "\n".join(page.extract_text() for page in reader.pages)
+            elif file_extension == ".txt":
+                with open(file_path, "r", encoding="utf-8") as f:
+                    file_text = f.read()
+            else:
+                raise ValueError(f"Unsupported file format: {file_extension}")
+            file_docs = [Document(page_content=file_text, metadata={"source": "uploaded_file"})]
+            file_splits = self.text_splitter.split_documents(file_docs)
+            self.vector_store.add_documents(file_splits)
+            return file_text
+        except Exception as e:
+            raise RuntimeError(f"Error processing file: {str(e)}")
+    def count_tokens(self, text):
+        return len(text.split())
+    def calculate_price(self, input_text, output_text):
+        input_tokens = self.count_tokens(input_text)
+        output_tokens = self.count_tokens(output_text)
+        total_tokens = input_tokens + output_tokens
+        total_price = total_tokens * self.price_per_token
+        return total_tokens, f"{total_price:.6f} دلار"
+    def remove_think_sections(self, response_text):
+        return re.sub(r"<think>.*?</think>", "", response_text, flags=re.DOTALL)
+    def filter_to_persian(self, text):
+        return re.sub(r'[^\u0600-\u06FF\s\.,؛؟!٪،0-9]', '', text)
+    def answer_query(self, query, file_obj, summarize, tone, model_name, creativity,
+                     keywords, language, response_length, welcome_message, exclusion_words):
+        self._init_model(model_name)
+        if file_obj:
+            self.process_file(file_obj)
         search_query = f"{keywords} {query}" if keywords else query
+        retrieved_docs = self.vector_store.similarity_search(search_query, k=3)
         knowledge = "\n\n".join(doc.page_content for doc in retrieved_docs)
         tone_prompts = {
             "رسمی": "پاسخ را با لحنی رسمی و مودبانه ارائه کن.",
             "محاوره‌ای": "پاسخ را به صورت دوستانه ارائه کن.",
             "علمی": "پاسخ را با استدلال‌های منطقی ارائه کن.",
             "طنزآمیز": "پاسخ را با لحنی طنزآمیز ارائه کن.",
         }
+        tone_instruction = tone_prompts.get(tone, f"پاسخ را به زبان {language} ارائه کن.")
+        language_instruction = (f"پاسخ را فقط به زبان {language} ارائه کن و از زبان دیگری استفاده نکن مگر آنکه بخواهی کد بنویسی "
+                                f"که در آن صورت فقط از زبان انگلیسی استفاده کن مگر اینکه کاربر از تو درخواست کند از زبان دیگری استفاده بکنی و از زبان چینی استفاده نکن.") if language else ""
         if response_length == "کوتاه":
             length_instruction = "پاسخ را به صورت مختصر ارائه کن."
         elif response_length == "بلند":
             length_instruction = "پاسخ را به صورت مفصل و جامع ارائه کن."
         else:
             length_instruction = ""
         exclusion_instruction = f"از کلمات زیر در پاسخ استفاده نکن: {exclusion_words}" if exclusion_words else ""
         prompt = (
+            f"شما Parviz Mind هستید، یک دستیار هوش مصنوعی ساخته شده توسط امیرمهدی پرویز دانشجو دانشگاه صنعتی کرمانشاه "
             f"{tone_instruction} {language_instruction} {length_instruction} {exclusion_instruction}\n\n"
         )
+        if welcome_message and not self.chat_history:
             prompt = f"{welcome_message}\n\n" + prompt
+        if self.chat_history:
+            conversation_history = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in self.chat_history])
             prompt = f"{conversation_history}\n\n" + prompt
         prompt += f"اطلاعات مرتبط:\n{knowledge}\n\nسوال: {query}\nپاسخ:"
+        response = self.model.invoke(prompt, temperature=creativity)
+        cleaned_response = self.remove_think_sections(response.content)
+        cleaned_response = self.filter_to_persian(cleaned_response)
+        self.chat_history.append((query, cleaned_response))
+        total_tokens, price = self.calculate_price(prompt, cleaned_response)
+        summary_text = self.summarize_chat() if summarize else "خلاصه‌سازی غیرفعال است."
+        if summarize and summary_text != "خلاصه‌سازی غیرفعال است.":
+            self.db.save_summary({
+                'summary': summary_text,
+                'model': model_name,
+                'tokens': total_tokens
+            })
+        return cleaned_response, summary_text, total_tokens, price
+    def clear_history(self):
+        self.chat_history = []
+        return self.chat_history
+class ChatInterface:
+    def __init__(self, ai_core: AICore):
+        self.ai = ai_core
+        self._create_interface()
+    def _create_interface(self):
+        with gr.Blocks() as self.interface:
+            gr.Markdown("## 🤖 Parviz Mind")
+            gr.Markdown("**یک فایل (PDF یا TXT) آپلود کنید و سوال خود را بپرسید.**")
+            self.chatbot = gr.Chatbot(label="💬 تاریخچه چت")
+            self.query_input = gr.Textbox(label="❓ سوال خود را وارد کنید")
+            self.summarize_checkbox = gr.Checkbox(label="📌 خلاصه‌ساز را فعال کن")
+            self.submit_button = gr.Button("🚀 ارسال")
+            self.del_button = gr.Button("🗑 پاک کردن حافظه")
+            self.file_input = gr.File(label="📂 آپلود فایل", file_types=[".pdf", ".txt"])
+            with gr.Accordion("خلاصه چت", open=False):
+                with gr.Row():
+                    self.summary_output = gr.Textbox(label="📌 خلاصه مکالمه", interactive=False)
+            with gr.Accordion("تنظیمات پیشرفته", open=False):
+                with gr.Row():
+                    self.model_dropdown = gr.Dropdown(label="🔍 انتخاب مدل", choices=models, value=default_model)
+                    self.tone_dropdown = gr.Dropdown(label="🎭 لحن پاسخ", choices=["رسمی", "محاوره‌ای", "علمی", "طنزآمیز"], value="رسمی")
+                    self.language_dropdown = gr.Dropdown(label="🌐 زبان چت بات", choices=["فارسی", "انگلیسی", "عربی"], value="فارسی")
+                    self.token_count = gr.Textbox(label="🔢 تعداد توکن‌ها", interactive=False)
+                    self.token_price = gr.Textbox(label="💰 هزینه تخمینی", interactive=False)
+                with gr.Row():
+                    self.creativity_slider = gr.Slider(label="🎨 خلاقیت (Temperature)", minimum=0.0, maximum=1.0, step=0.1, value=0.7)
+                    self.response_length_dropdown = gr.Dropdown(label="📏 طول پاسخ", choices=["کوتاه", "بلند"], value="بلند")
+                self.keywords_input = gr.Textbox(label="🔑 کلمات کلیدی (اختیاری)")
+                self.welcome_message_input = gr.Textbox(label="👋 پیام خوش آمدگویی (اختیاری)")
+                self.exclusion_words_input = gr.Textbox(label="🚫 کلمات استثنا (اختیاری)")
+            self.del_button.click(
+                self.clear_chat,
+                inputs=[],
+                outputs=[self.chatbot, self.summary_output, self.token_count, self.token_price]
+            )
+            self.submit_button.click(
+                self.process_chat,
+                inputs=[
+                    self.query_input, self.file_input, self.summarize_checkbox,
+                    self.tone_dropdown, self.model_dropdown, self.creativity_slider,
+                    self.keywords_input, self.language_dropdown, self.response_length_dropdown,
+                    self.welcome_message_input, self.exclusion_words_input
+                ],
+                outputs=[self.chatbot, self.summary_output, self.token_count, self.token_price]
+            )
+            self.query_input.submit(
+                self.process_chat,
+                inputs=[
+                    self.query_input, self.file_input, self.summarize_checkbox,
+                    self.tone_dropdown, self.model_dropdown, self.creativity_slider,
+                    self.keywords_input, self.language_dropdown, self.response_length_dropdown,
+                    self.welcome_message_input, self.exclusion_words_input
+                ],
+                outputs=[self.chatbot, self.summary_output, self.token_count, self.token_price]
+            )
+    def process_chat(self, query, file_obj, summarize, tone, model_name, creativity,
+                     keywords, language, response_length, welcome_message, exclusion_words):
+        response, summary, total_tokens, price = self.ai.answer_query(
+            query, file_obj, summarize, tone, model_name, creativity,
+            keywords, language, response_length, welcome_message, exclusion_words
+        )
+        return self.ai.chat_history, summary, total_tokens, price
+    def clear_chat(self):
+        self.ai.clear_history()
+        return self.ai.chat_history, "", 0, "0 دلار"
+    def launch(self):
+        self.interface.launch()
+if __name__ == "__main__":
+    ai_core = AICore()
+    chat_app = ChatInterface(ai_core)
+    chat_app.launch()