Spaces:

GIGAParviz
/

Parviz_Mind

Running

File size: 6,705 Bytes

fd21fa2
b7764cf
 
 
 
 
 
 
 
fc39101
b7764cf
 
 
bd29fc5
b7764cf
a94ff47
b7764cf
bd29fc5
b7764cf
 
 
bd29fc5
b7764cf
 
 
 
 
 
 
bd29fc5
b7764cf
 
 
 
 
 
bd29fc5
fc39101
b7764cf
 
 
 
 
 
 
 
 
fc39101
b7764cf
 
 
 
 
 
 
 
fc39101
b7764cf
 
 
fd21fa2
b7764cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd21fa2
 
b7764cf
 
fc39101
b7764cf
fc39101
b7764cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc39101
b7764cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc39101
b7764cf
 
 
fc39101
b7764cf

import os
import re
import gradio as gr
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter

embeddings = HuggingFaceEmbeddings(model_name="heydariAI/persian-embeddings")
vector_store = InMemoryVectorStore(embeddings)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
model = ChatGroq(api_key="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", model_name="deepseek-r1-distill-llama-70b")

chat_history = [] 

PRICE_PER_TOKEN = 0.00001  

def count_tokens(text):
    """تخمین تعداد توکن‌های متن."""
    return len(text.split())

def calculate_price(input_text, output_text):
    """محاسبه هزینه بر اساس تعداد توکن‌ها."""
    input_tokens = count_tokens(input_text)
    output_tokens = count_tokens(output_text)
    total_tokens = input_tokens + output_tokens
    total_price = total_tokens * PRICE_PER_TOKEN
    return total_tokens, f"{total_price:.6f} دلار"

def process_file(file_path):
    """پردازش فایل و بازگرداندن محتوای آن."""
    if not file_path:
        return None

    file_extension = os.path.splitext(file_path)[1].lower()

    try:
        if file_extension == ".pdf":
            from pypdf import PdfReader
            reader = PdfReader(file_path)
            return "\n".join(page.extract_text() for page in reader.pages)
        elif file_extension == ".txt":
            with open(file_path, "r", encoding="utf-8") as f:
                return f.read()
        else:
            raise ValueError(f"فرمت فایل پشتیبانی نمی‌شود: {file_extension}")
    except Exception as e:
        raise RuntimeError(f"خطا در پردازش فایل: {str(e)}")


def remove_think_sections(response_text):
    """حذف بخش‌های که با <think> شروع و با </think> تمام می‌شوند."""

    cleaned_text = re.sub(r"<think>.*?</think>", "", response_text, flags=re.DOTALL)
    return cleaned_text

def answer_query(query, file_path, summarize, tone):
    """پاسخ به سوالات کاربر با تنظیم لحن و محاسبه هزینه توکن."""
    global chat_history
    try:
        file_content = process_file(file_path) if file_path else None
        if file_content:
            file_docs = [Document(page_content=file_content, metadata={"source": "uploaded_file"})]
            file_splits = text_splitter.split_documents(file_docs)
            vector_store.add_documents(file_splits)

        retrieved_docs = vector_store.similarity_search(query, k=2)
        knowledge = "\n\n".join(doc.page_content for doc in retrieved_docs)

        tone_prompts = {
            "رسمی": "پاسخ را با لحنی رسمی و مودبانه ارائه کن.",
            "محاوره‌ای": "پاسخ را به صورت دوستانه و غیررسمی ارائه کن.",
            "علمی": "پاسخ را با ذکر منابع علمی و استدلال‌های منطقی ارائه کن.",
            "طنزآمیز": "پاسخ را با لحنی طنزآمیز و سرگرم‌کننده ارائه کن.",
        }
        tone_instruction = tone_prompts.get(tone, "پاسخ را به زبان فارسی ارائه کن.")

        prompt = (
            f"شما ParvizGPT هستید، یک دستیار هوش مصنوعی که توسط امیر مهدی پرویز ساخته شده است. "
            f"همیشه به فارسی پاسخ دهید. {tone_instruction} "
            f"\n\nاطلاعات مرتبط:\n{knowledge}\n\nسوال: {query}\nپاسخ:"
        )

        response = model.invoke(prompt)
        response_text = response.content

        cleaned_response = remove_think_sections(response_text)

        chat_history.append((query, cleaned_response))  

        total_tokens, price = calculate_price(prompt, cleaned_response)

        summary = summarize_chat() if summarize else "خلاصه‌سازی غیرفعال است."

        return cleaned_response, summary, total_tokens, price

    except Exception as e:
        return f"خطا: {str(e)}", "", 0, "0 دلار"

def summarize_chat():
    """خلاصه‌سازی مکالمات اخیر."""
    chat_text = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in chat_history])  
    summary_prompt = f"یک خلاصه کوتاه و دقیق از مکالمه زیر ارائه کن:\n\n{chat_text}\n\nخلاصه:"
    summary_response = model.invoke(summary_prompt)
    return summary_response.content

def chat_with_bot(query, file, summarize, tone):
    """رابط Gradio برای چت."""
    file_path = file.name if file else None
    response, summary, total_tokens, price = answer_query(query, file_path, summarize, tone)
    return response, summary, total_tokens, price

with gr.Blocks() as demo:
    gr.Markdown("## 🤖 Parviz GPT")
    gr.Markdown("**یک فایل (PDF یا TXT) آپلود کنید و سوال خود را بپرسید.**")
    
    with gr.Column():

        chat_output = gr.Textbox(label="📝 تاریخچه چت", interactive=False, lines=10)
        summary_output = gr.Textbox(label="📌 خلاصه مکالمه", interactive=False)

        query_input = gr.Textbox(label="❓ سوال خود را وارد کنید", placeholder="مثلاً: کی تو را ساخته است؟")

        with gr.Row():
            summarize_checkbox = gr.Checkbox(label="📌 خلاصه‌ساز را فعال کن")
            submit_button = gr.Button("🚀 ارسال")
            tone_dropdown = gr.Dropdown(label="🎭 انتخاب لحن پاسخ", choices=["رسمی", "محاوره‌ای", "علمی", "طنزآمیز"], value="رسمی")


    with gr.Row():
        token_count = gr.Textbox(label="🔢 تعداد توکن‌ها", interactive=False)
        token_price = gr.Textbox(label="💰 هزینه تخمینی", interactive=False)

    with gr.Row():
        file_input = gr.File(label="📂 فایل خود را آپلود کنید", file_types=[".pdf", ".txt"])

    query_input.submit(fn=chat_with_bot, 
                        inputs=[query_input, file_input, summarize_checkbox, tone_dropdown], 
                        outputs=[chat_output, summary_output, token_count, token_price])
    
    submit_button.click(fn=chat_with_bot, 
                        inputs=[query_input, file_input, summarize_checkbox, tone_dropdown], 
                        outputs=[chat_output, summary_output, token_count, token_price])

demo.launch()