Spaces:

GIGAParviz
/

Parviz_Mind

Sleeping

File size: 12,834 Bytes

import os
import re
import sqlite3
from datetime import datetime
from pypdf import PdfReader
import gradio as gr
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma  
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter

models = ["deepseek-r1-distill-llama-70b", "llama-3.3-70b-versatile", "gemma2-9b-it"]
default_model = models[0]

class DatabaseManager:
    def __init__(self, db_name="chat_history.db"):
        self.conn = sqlite3.connect(db_name)
        self._create_tables()
    def _create_tables(self):
        cursor = self.conn.cursor()
        cursor.execute(
            '''CREATE TABLE IF NOT EXISTS chat_summaries
               (id INTEGER PRIMARY KEY AUTOINCREMENT,
                timestamp DATETIME,
                summary TEXT,
                model_used TEXT,
                token_count INT)'''
        )
        self.conn.commit()
    def save_summary(self, summary_data):
        try:
            cursor = self.conn.cursor()
            cursor.execute(
                '''INSERT INTO chat_summaries 
                   (timestamp, summary, model_used, token_count)
                   VALUES (?, ?, ?, ?)''',
                (datetime.now(), 
                 summary_data['summary'],
                 summary_data['model'],
                 summary_data['tokens'])
            )
            self.conn.commit()
            return True
        except Exception as e:
            print(f"Database error: {str(e)}")
            return False
    def load_summaries(self, limit=5):
        cursor = self.conn.cursor()
        cursor.execute(
            "SELECT summary FROM chat_summaries ORDER BY id DESC LIMIT ?",
            (limit,)
        )
        rows = cursor.fetchall()
        return "\n".join([row[0] for row in rows])

class AICore:
    def __init__(self):
        self.embeddings = HuggingFaceEmbeddings(model_name="heydariAI/persian-embeddings")
        self.vector_store = Chroma(embedding_function=self.embeddings)
        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        self.chat_history = []
        self.price_per_token = 0.00001
        self.api_key = "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"
        self.model = ChatGroq(api_key=self.api_key, model_name=default_model)
        self.db = DatabaseManager()
    def _init_model(self, model_name):
        if self.model.model_name != model_name:
            self.model = ChatGroq(api_key=self.api_key, model_name=model_name)
    def summarize_chat(self):
        chat_text = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in self.chat_history])
        summary_prompt = f"یک خلاصه کوتاه از مکالمه زیر ارائه کن:\n\n{chat_text}\n\nخلاصه:"
        summary_response = self.model.invoke(summary_prompt)
        return summary_response.content
    def process_file(self, file_obj):
        if not file_obj:
            return None
        file_path = file_obj.name if hasattr(file_obj, "name") else file_obj
        file_extension = os.path.splitext(file_path)[1].lower()
        try:
            if file_extension == ".pdf":
                reader = PdfReader(file_path)
                file_text = "\n".join(page.extract_text() for page in reader.pages)
            elif file_extension == ".txt":
                with open(file_path, "r", encoding="utf-8") as f:
                    file_text = f.read()
            else:
                raise ValueError(f"Unsupported file format: {file_extension}")
            file_docs = [Document(page_content=file_text, metadata={"source": "uploaded_file"})]
            file_splits = self.text_splitter.split_documents(file_docs)
            self.vector_store.add_documents(file_splits)
            return file_text
        except Exception as e:
            raise RuntimeError(f"Error processing file: {str(e)}")
    def count_tokens(self, text):
        return len(text.split())
    def calculate_price(self, input_text, output_text):
        input_tokens = self.count_tokens(input_text)
        output_tokens = self.count_tokens(output_text)
        total_tokens = input_tokens + output_tokens
        total_price = total_tokens * self.price_per_token
        return total_tokens, f"{total_price:.6f} دلار"
    def remove_think_sections(self, response_text):
        return re.sub(r"<think>.*?</think>", "", response_text, flags=re.DOTALL)
    def filter_to_persian(self, text):
        return re.sub(r'[^\u0600-\u06FF\s\.,؛؟!٪،0-9]', '', text)
    def answer_query(self, query, file_obj, summarize, tone, model_name, creativity,
                     keywords, language, response_length, welcome_message, exclusion_words):
        self._init_model(model_name)
        if file_obj:
            self.process_file(file_obj)
        search_query = f"{keywords} {query}" if keywords else query
        retrieved_docs = self.vector_store.similarity_search(search_query, k=3)
        knowledge = "\n\n".join(doc.page_content for doc in retrieved_docs)
        tone_prompts = {
            "رسمی": "پاسخ را با لحنی رسمی و مودبانه ارائه کن.",
            "محاوره‌ای": "پاسخ را به صورت دوستانه ارائه کن.",
            "علمی": "پاسخ را با استدلال‌های منطقی ارائه کن.",
            "طنزآمیز": "پاسخ را با لحنی طنزآمیز ارائه کن.",
        }
        tone_instruction = tone_prompts.get(tone, f"پاسخ را به زبان {language} ارائه کن.")
        language_instruction = (f"پاسخ را فقط به زبان {language} ارائه کن و از زبان دیگری استفاده نکن مگر آنکه بخواهی کد بنویسی "
                                f"که در آن صورت فقط از زبان انگلیسی استفاده کن مگر اینکه کاربر از تو درخواست کند از زبان دیگری استفاده بکنی و از زبان چینی استفاده نکن.") if language else ""
        if response_length == "کوتاه":
            length_instruction = "پاسخ را به صورت مختصر ارائه کن."
        elif response_length == "بلند":
            length_instruction = "پاسخ را به صورت مفصل و جامع ارائه کن."
        else:
            length_instruction = ""
        exclusion_instruction = f"از کلمات زیر در پاسخ استفاده نکن: {exclusion_words}" if exclusion_words else ""
        prompt = (
            f"شما Parviz Mind هستید، یک دستیار هوش مصنوعی ساخته شده توسط امیرمهدی پرویز دانشجو دانشگاه صنعتی کرمانشاه "
            f"{tone_instruction} {language_instruction} {length_instruction} {exclusion_instruction}\n\n"
        )
        if welcome_message and not self.chat_history:
            prompt = f"{welcome_message}\n\n" + prompt
        if self.chat_history:
            conversation_history = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in self.chat_history])
            prompt = f"{conversation_history}\n\n" + prompt
        prompt += f"اطلاعات مرتبط:\n{knowledge}\n\nسوال: {query}\nپاسخ:"
        response = self.model.invoke(prompt, temperature=creativity)
        cleaned_response = self.remove_think_sections(response.content)
        cleaned_response = self.filter_to_persian(cleaned_response)
        self.chat_history.append((query, cleaned_response))
        total_tokens, price = self.calculate_price(prompt, cleaned_response)
        summary_text = self.summarize_chat() if summarize else "خلاصه‌سازی غیرفعال است."
        if summarize and summary_text != "خلاصه‌سازی غیرفعال است.":
            self.db.save_summary({
                'summary': summary_text,
                'model': model_name,
                'tokens': total_tokens
            })
        return cleaned_response, summary_text, total_tokens, price
    def clear_history(self):
        self.chat_history = []
        return self.chat_history

class ChatInterface:
    def __init__(self, ai_core: AICore):
        self.ai = ai_core
        self._create_interface()
    def _create_interface(self):
        with gr.Blocks() as self.interface:
            gr.Markdown("## 🤖 Parviz Mind")
            gr.Markdown("**یک فایل (PDF یا TXT) آپلود کنید و سوال خود را بپرسید.**")
            self.chatbot = gr.Chatbot(label="💬 تاریخچه چت")
            self.query_input = gr.Textbox(label="❓ سوال خود را وارد کنید")
            self.summarize_checkbox = gr.Checkbox(label="📌 خلاصه‌ساز را فعال کن")
            self.submit_button = gr.Button("🚀 ارسال")
            self.del_button = gr.Button("🗑 پاک کردن حافظه")
            self.file_input = gr.File(label="📂 آپلود فایل", file_types=[".pdf", ".txt"])
            with gr.Accordion("خلاصه چت", open=False):
                with gr.Row():
                    self.summary_output = gr.Textbox(label="📌 خلاصه مکالمه", interactive=False)
            with gr.Accordion("تنظیمات پیشرفته", open=False):
                with gr.Row():
                    self.model_dropdown = gr.Dropdown(label="🔍 انتخاب مدل", choices=models, value=default_model)
                    self.tone_dropdown = gr.Dropdown(label="🎭 لحن پاسخ", choices=["رسمی", "محاوره‌ای", "علمی", "طنزآمیز"], value="رسمی")
                    self.language_dropdown = gr.Dropdown(label="🌐 زبان چت بات", choices=["فارسی", "انگلیسی", "عربی"], value="فارسی")
                    self.token_count = gr.Textbox(label="🔢 تعداد توکن‌ها", interactive=False)
                    self.token_price = gr.Textbox(label="💰 هزینه تخمینی", interactive=False)
                with gr.Row():
                    self.creativity_slider = gr.Slider(label="🎨 خلاقیت (Temperature)", minimum=0.0, maximum=1.0, step=0.1, value=0.7)
                    self.response_length_dropdown = gr.Dropdown(label="📏 طول پاسخ", choices=["کوتاه", "بلند"], value="بلند")
                self.keywords_input = gr.Textbox(label="🔑 کلمات کلیدی (اختیاری)")
                self.welcome_message_input = gr.Textbox(label="👋 پیام خوش آمدگویی (اختیاری)")
                self.exclusion_words_input = gr.Textbox(label="🚫 کلمات استثنا (اختیاری)")
            self.del_button.click(
                self.clear_chat,
                inputs=[],
                outputs=[self.chatbot, self.summary_output, self.token_count, self.token_price]
            )
            self.submit_button.click(
                self.process_chat,
                inputs=[
                    self.query_input, self.file_input, self.summarize_checkbox,
                    self.tone_dropdown, self.model_dropdown, self.creativity_slider,
                    self.keywords_input, self.language_dropdown, self.response_length_dropdown,
                    self.welcome_message_input, self.exclusion_words_input
                ],
                outputs=[self.chatbot, self.summary_output, self.token_count, self.token_price]
            )
            self.query_input.submit(
                self.process_chat,
                inputs=[
                    self.query_input, self.file_input, self.summarize_checkbox,
                    self.tone_dropdown, self.model_dropdown, self.creativity_slider,
                    self.keywords_input, self.language_dropdown, self.response_length_dropdown,
                    self.welcome_message_input, self.exclusion_words_input
                ],
                outputs=[self.chatbot, self.summary_output, self.token_count, self.token_price]
            )
    def process_chat(self, query, file_obj, summarize, tone, model_name, creativity,
                     keywords, language, response_length, welcome_message, exclusion_words):
        response, summary, total_tokens, price = self.ai.answer_query(
            query, file_obj, summarize, tone, model_name, creativity,
            keywords, language, response_length, welcome_message, exclusion_words
        )
        return self.ai.chat_history, summary, total_tokens, price
    def clear_chat(self):
        self.ai.clear_history()
        return self.ai.chat_history, "", 0, "0 دلار"
    def launch(self):
        self.interface.launch()

if __name__ == "__main__":
    ai_core = AICore()
    chat_app = ChatInterface(ai_core)
    chat_app.launch()