Spaces:

GIGAParviz
/

Parviz_Mind

Sleeping

App Files Files

GIGAParviz commited on Feb 10

Commit

478254d

verified ·

1 Parent(s): 769d3a7

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -75

app.py CHANGED Viewed

@@ -1,146 +1,187 @@
 import os
 import re
 import gradio as gr
 from langchain_groq import ChatGroq
 from langchain_huggingface import HuggingFaceEmbeddings
-from langchain_core.vectorstores import InMemoryVectorStore
 from langchain_core.documents import Document
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 embeddings = HuggingFaceEmbeddings(model_name="heydariAI/persian-embeddings")
-vector_store = InMemoryVectorStore(embeddings)
 text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-model = ChatGroq(api_key="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", model_name="deepseek-r1-distill-llama-70b")
-chat_history = []
-PRICE_PER_TOKEN = 0.00001
 def count_tokens(text):
-    """تخمین تعداد توکن‌های متن."""
     return len(text.split())
 def calculate_price(input_text, output_text):
-    """محاسبه هزینه بر اساس تعداد توکن‌ها."""
     input_tokens = count_tokens(input_text)
     output_tokens = count_tokens(output_text)
     total_tokens = input_tokens + output_tokens
     total_price = total_tokens * PRICE_PER_TOKEN
-    return total_tokens, f"{total_price:.6f} دلار"
 def process_file(file_path):
-    """پردازش فایل و بازگرداندن محتوای آن."""
     if not file_path:
         return None
     file_extension = os.path.splitext(file_path)[1].lower()
     try:
         if file_extension == ".pdf":
-            from pypdf import PdfReader
             reader = PdfReader(file_path)
-            return "\n".join(page.extract_text() for page in reader.pages)
         elif file_extension == ".txt":
             with open(file_path, "r", encoding="utf-8") as f:
-                return f.read()
         else:
-            raise ValueError(f"فرمت فایل پشتیبانی نمی‌شود: {file_extension}")
-    except Exception as e:
-        raise RuntimeError(f"خطا در پردازش فایل: {str(e)}")
 def remove_think_sections(response_text):
-    """حذف بخش‌های که با <think> شروع و با </think> تمام می‌شوند."""
-    cleaned_text = re.sub(r"<think>.*?</think>", "", response_text, flags=re.DOTALL)
-    return cleaned_text
-def answer_query(query, file_path, summarize, tone):
-    """پاسخ به سوالات کاربر با تنظیم لحن و محاسبه هزینه توکن."""
     global chat_history
     try:
-        file_content = process_file(file_path) if file_path else None
-        if file_content:
-            file_docs = [Document(page_content=file_content, metadata={"source": "uploaded_file"})]
-            file_splits = text_splitter.split_documents(file_docs)
-            vector_store.add_documents(file_splits)
-        retrieved_docs = vector_store.similarity_search(query, k=2)
         knowledge = "\n\n".join(doc.page_content for doc in retrieved_docs)
         tone_prompts = {
             "رسمی": "پاسخ را با لحنی رسمی و مودبانه ارائه کن.",
-            "محاوره‌ای": "پاسخ را به صورت دوستانه و غیررسمی ارائه کن.",
-            "علمی": "پاسخ را با ذکر منابع علمی و استدلال‌های منطقی ارائه کن.",
-            "طنزآمیز": "پاسخ را با لحنی طنزآمیز و سرگرم‌کننده ارائه کن.",
         }
-        tone_instruction = tone_prompts.get(tone, "پاسخ را به زبان فارسی ارائه کن.")
         prompt = (
-            f"شما ParvizGPT هستید، یک دستیار هوش مصنوعی که توسط امیر مهدی پرویز ساخته شده است. "
-            f"همیشه به فارسی پاسخ دهید. {tone_instruction} "
-            f"\n\nاطلاعات مرتبط:\n{knowledge}\n\nسوال: {query}\nپاسخ:"
         )
-        response = model.invoke(prompt)
-        response_text = response.content
-        cleaned_response = remove_think_sections(response_text)
-        chat_history.append((query, cleaned_response))
-        total_tokens, price = calculate_price(prompt, cleaned_response)
-        summary = summarize_chat() if summarize else "خلاصه‌سازی غیرفعال است."
-        return cleaned_response, summary, total_tokens, price
     except Exception as e:
         return f"خطا: {str(e)}", "", 0, "0 دلار"
-def summarize_chat():
-    """خلاصه‌سازی مکالمات اخیر."""
-    chat_text = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in chat_history])
-    summary_prompt = f"یک خلاصه کوتاه و دقیق از مکالمه زیر ارائه کن:\n\n{chat_text}\n\nخلاصه:"
-    summary_response = model.invoke(summary_prompt)
-    return summary_response.content
-def chat_with_bot(query, file, summarize, tone):
-    """رابط Gradio برای چت."""
     file_path = file.name if file else None
-    response, summary, total_tokens, price = answer_query(query, file_path, summarize, tone)
-    return response, summary, total_tokens, price
 with gr.Blocks() as demo:
-    gr.Markdown("## 🤖 Parviz GPT")
     gr.Markdown("**یک فایل (PDF یا TXT) آپلود کنید و سوال خود را بپرسید.**")
-    with gr.Column():
-        chat_output = gr.Textbox(label="📝 تاریخچه چت", interactive=False, lines=10)
-        summary_output = gr.Textbox(label="📌 خلاصه مکالمه", interactive=False)
-        query_input = gr.Textbox(label="❓ سوال خود را وارد کنید", placeholder="مثلاً: کی تو را ساخته است؟")
-        with gr.Row():
-            summarize_checkbox = gr.Checkbox(label="📌 خلاصه‌ساز را فعال کن")
-            submit_button = gr.Button("🚀 ارسال")
-            tone_dropdown = gr.Dropdown(label="🎭 انتخاب لحن پاسخ", choices=["رسمی", "محاوره‌ای", "علمی", "طنزآمیز"], value="رسمی")
     with gr.Row():
-        token_count = gr.Textbox(label="🔢 تعداد توکن‌ها", interactive=False)
-        token_price = gr.Textbox(label="💰 هزینه تخمینی", interactive=False)
     with gr.Row():
-        file_input = gr.File(label="📂 فایل خود را آپلود کنید", file_types=[".pdf", ".txt"])
-    query_input.submit(fn=chat_with_bot,
-                        inputs=[query_input, file_input, summarize_checkbox, tone_dropdown],
-                        outputs=[chat_output, summary_output, token_count, token_price])
-    submit_button.click(fn=chat_with_bot,
-                        inputs=[query_input, file_input, summarize_checkbox, tone_dropdown],
-                        outputs=[chat_output, summary_output, token_count, token_price])
 demo.launch()

 import os
 import re
+from pypdf import PdfReader
 import gradio as gr
 from langchain_groq import ChatGroq
 from langchain_huggingface import HuggingFaceEmbeddings
+from langchain.vectorstores import Chroma
 from langchain_core.documents import Document
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 embeddings = HuggingFaceEmbeddings(model_name="heydariAI/persian-embeddings")
+vector_store = Chroma(embedding_function=embeddings)
 text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+models = ["deepseek-r1-distill-llama-70b", "llama-3.3-70b-versatile", "gemma2-9b-it"]
+default_model = models[0]
+model = ChatGroq(api_key="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", model_name=default_model)
+chat_history = []
+PRICE_PER_TOKEN = 0.00001
 def count_tokens(text):
     return len(text.split())
 def calculate_price(input_text, output_text):
     input_tokens = count_tokens(input_text)
     output_tokens = count_tokens(output_text)
     total_tokens = input_tokens + output_tokens
     total_price = total_tokens * PRICE_PER_TOKEN
+    return total_tokens, f"{total_price:.6f} هزار تومان"
 def process_file(file_path):
+    """Process file and store in ChromaDB."""
     if not file_path:
         return None
     file_extension = os.path.splitext(file_path)[1].lower()
     try:
         if file_extension == ".pdf":
             reader = PdfReader(file_path)
+            file_text = "\n".join(page.extract_text() for page in reader.pages)
         elif file_extension == ".txt":
             with open(file_path, "r", encoding="utf-8") as f:
+                file_text = f.read()
         else:
+            raise ValueError(f"Unsupported file format: {file_extension}")
+        file_docs = [Document(page_content=file_text, metadata={"source": "uploaded_file"})]
+        file_splits = text_splitter.split_documents(file_docs)
+        vector_store.add_documents(file_splits)
+        return file_text
+    except Exception as e:
+        raise RuntimeError(f"Error processing file: {str(e)}")
 def remove_think_sections(response_text):
+    return re.sub(r"<think>.*?</think>", "", response_text, flags=re.DOTALL)
+def summarize_chat(model):
+    chat_text = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in chat_history])
+    summary_prompt = f"یک خلاصه کوتاه از مکالمه زیر ارائه کن:\n\n{chat_text}\n\nخلاصه:"
+    summary_response = model.invoke(summary_prompt)
+    return summary_response.content
+def answer_query(query, file_path, summarize, tone, model_name, creativity, keywords, language, response_length, welcome_message, exclusion_words):
     global chat_history
+    model = ChatGroq(api_key="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", model_name=model_name)
     try:
+        if file_path:
+            process_file(file_path)
+        search_query = f"{keywords} {query}" if keywords else query
+        retrieved_docs = vector_store.similarity_search(search_query, k=3)
         knowledge = "\n\n".join(doc.page_content for doc in retrieved_docs)
         tone_prompts = {
             "رسمی": "پاسخ را با لحنی رسمی و مودبانه ارائه کن.",
+            "محاوره‌ای": "پاسخ را به صورت دوستانه ارائه کن.",
+            "علمی": "پاسخ را با استدلال‌های منطقی ارائه کن.",
+            "طنزآمیز": "پاسخ را با لحنی طنزآمیز ارائه کن.",
         }
+        tone_instruction = tone_prompts.get(tone, (f"پاسخ را به زبان {language} ارائه کن."))
+        language_instruction = f"پاسخ را فقط به زبان {language} ارائه کن و از زبان دیگری استفاده نکن مگر آنکه بخواهی کد بنویسی که در آن صورت فقط از زبان انگلیسی استفاده کن مگر اینکه کاربر از تو درخواست کند از زبان دیگری استفاده بکنی و از زبان چینی استفاده نکن." if language else ""
+        if response_length == "کوتاه":
+            length_instruction = "پاسخ را به صورت مختصر ارائه کن."
+        elif response_length == "بلند":
+            length_instruction = "پاسخ را به صورت مفصل و جامع ارائه کن."
+        else:
+            length_instruction = ""
+        exclusion_instruction = f"از کلمات زیر در پاسخ استفاده نکن: {exclusion_words}" if exclusion_words else ""
         prompt = (
+            f"شما ParvizGPT هستید، یک دستیار هوش مصنوعی ساخته شده توسط امیرمهدی پرویز دانشجو دانشگاه صنعتی کرمانشاه "
+            f"{tone_instruction} {language_instruction} {length_instruction} {exclusion_instruction}\n\n"
         )
+        if welcome_message and not chat_history:
+            prompt = f"{welcome_message}\n\n" + prompt
+        if chat_history:
+            conversation_history = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in chat_history])
+            prompt = f"{conversation_history}\n\n" + prompt
+        prompt += f"اطلاعات مرتبط:\n{knowledge}\n\nسوال: {query}\nپاسخ:"
+        response = model.invoke(prompt, temperature=creativity)
+        cleaned_response = remove_think_sections(response.content)
+        chat_history.append((query, cleaned_response))
+        total_tokens, price = calculate_price(prompt, cleaned_response)
+        summary = summarize_chat(model) if summarize else "خلاصه‌سازی غیرفعال است."
+        return cleaned_response, summary, total_tokens, price
     except Exception as e:
         return f"خطا: {str(e)}", "", 0, "0 دلار"
+def chat_with_bot(query, file, summarize, tone, model_name, creativity, keywords, language, response_length, welcome_message, exclusion_words):
     file_path = file.name if file else None
+    return answer_query(query, file_path, summarize, tone, model_name, creativity, keywords, language, response_length, welcome_message, exclusion_words)
+def clear_memory():
+    global chat_history
+    chat_history = []
+    return '' , '' , 0 , 0
 with gr.Blocks() as demo:
+    gr.Markdown("## 🤖 Parviz GPT - چت بات هوش مصنوعی")
     gr.Markdown("**یک فایل (PDF یا TXT) آپلود کنید و سوال خود را بپرسید.**")
+    chat_output = gr.Textbox(label="📝 پاسخ", interactive=False, lines=10)
+    query_input = gr.Textbox(label="❓ سوال خود را وارد کنید")
+    submit_button = gr.Button("🚀 ارسال")
+    del_button = gr.Button("پاک کردن حافظه")
+    summary_output = gr.Textbox(label="📌 خلاصه مکالمه", interactive=False)
+    token_count = gr.Textbox(label="🔢 تعداد توکن‌ها", interactive=False)
+    token_price = gr.Textbox(label="💰 هزینه تخمینی", interactive=False)
+    file_input = gr.File(label="📂 آپلود فایل", file_types=[".pdf", ".txt"])
     with gr.Row():
+        model_dropdown = gr.Dropdown(label="🔍 انتخاب مدل", choices=models, value=default_model)
+        tone_dropdown = gr.Dropdown(label="🎭 لحن پاسخ", choices=["رسمی", "محاوره‌ای", "علمی", "طنزآمیز"], value="رسمی")
+        language_dropdown = gr.Dropdown(label="🌐 زبان چت بات", choices=["فارسی", "انگلیسی", "عربی"], value="فارسی")
     with gr.Row():
+        creativity_slider = gr.Slider(label="🎨 خلاقیت (Temperature)", minimum=0.0, maximum=1.0, step=0.1, value=0.7)
+        response_length_dropdown = gr.Dropdown(label="📏 طول پاسخ", choices=["کوتاه", "بلند"], value="بلند")
+    keywords_input = gr.Textbox(label="🔑 کلمات کلیدی (اختیاری)")
+    welcome_message_input = gr.Textbox(label="👋 پیام خوش آمدگویی (اختیاری)")
+    exclusion_words_input = gr.Textbox(label="🚫 کلمات استثنا (اختیاری)")
+    summarize_checkbox = gr.Checkbox(label="📌 خلاصه‌ساز را فعال کن")
+    del_button.click(clear_memory,
+                     inputs=[],
+                     outputs=[chat_output, summary_output, token_count, token_price])
+    query_input.submit(fn=chat_with_bot,
+        inputs=[query_input, file_input, summarize_checkbox, tone_dropdown, model_dropdown,
+                creativity_slider, keywords_input, language_dropdown, response_length_dropdown,
+                welcome_message_input, exclusion_words_input
+        ],
+        outputs=[chat_output, summary_output, token_count, token_price])
+    submit_button.click(
+        chat_with_bot,
+        inputs=[query_input, file_input, summarize_checkbox, tone_dropdown, model_dropdown,
+                creativity_slider, keywords_input, language_dropdown, response_length_dropdown,
+                welcome_message_input, exclusion_words_input
+        ],
+        outputs=[chat_output, summary_output, token_count, token_price]
+    )
 demo.launch()