Spaces:

GIGAParviz
/

Parviz_Mind

Sleeping

App Files Files

Parviz_Mind / app.py

GIGAParviz

Update app.py

b7764cf verified 5 months ago

raw

history blame

6.71 kB

	import os
	import re
	import gradio as gr
	from langchain_groq import ChatGroq
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_core.vectorstores import InMemoryVectorStore
	from langchain_core.documents import Document
	from langchain_text_splitters import RecursiveCharacterTextSplitter

	embeddings = HuggingFaceEmbeddings(model_name="heydariAI/persian-embeddings")
	vector_store = InMemoryVectorStore(embeddings)
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	model = ChatGroq(api_key="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", model_name="deepseek-r1-distill-llama-70b")

	chat_history = []

	PRICE_PER_TOKEN = 0.00001

	def count_tokens(text):
	"""تخمین تعداد توکن‌های متن."""
	return len(text.split())

	def calculate_price(input_text, output_text):
	"""محاسبه هزینه بر اساس تعداد توکن‌ها."""
	input_tokens = count_tokens(input_text)
	output_tokens = count_tokens(output_text)
	total_tokens = input_tokens + output_tokens
	total_price = total_tokens * PRICE_PER_TOKEN
	return total_tokens, f"{total_price:.6f} دلار"

	def process_file(file_path):
	"""پردازش فایل و بازگرداندن محتوای آن."""
	if not file_path:
	return None

	file_extension = os.path.splitext(file_path)[1].lower()

	try:
	if file_extension == ".pdf":
	from pypdf import PdfReader
	reader = PdfReader(file_path)
	return "\n".join(page.extract_text() for page in reader.pages)
	elif file_extension == ".txt":
	with open(file_path, "r", encoding="utf-8") as f:
	return f.read()
	else:
	raise ValueError(f"فرمت فایل پشتیبانی نمی‌شود: {file_extension}")
	except Exception as e:
	raise RuntimeError(f"خطا در پردازش فایل: {str(e)}")


	def remove_think_sections(response_text):
	"""حذف بخش‌های که با <think> شروع و با </think> تمام می‌شوند."""

	cleaned_text = re.sub(r"<think>.*?</think>", "", response_text, flags=re.DOTALL)
	return cleaned_text

	def answer_query(query, file_path, summarize, tone):
	"""پاسخ به سوالات کاربر با تنظیم لحن و محاسبه هزینه توکن."""
	global chat_history
	try:
	file_content = process_file(file_path) if file_path else None
	if file_content:
	file_docs = [Document(page_content=file_content, metadata={"source": "uploaded_file"})]
	file_splits = text_splitter.split_documents(file_docs)
	vector_store.add_documents(file_splits)

	retrieved_docs = vector_store.similarity_search(query, k=2)
	knowledge = "\n\n".join(doc.page_content for doc in retrieved_docs)

	tone_prompts = {
	"رسمی": "پاسخ را با لحنی رسمی و مودبانه ارائه کن.",
	"محاوره‌ای": "پاسخ را به صورت دوستانه و غیررسمی ارائه کن.",
	"علمی": "پاسخ را با ذکر منابع علمی و استدلال‌های منطقی ارائه کن.",
	"طنزآمیز": "پاسخ را با لحنی طنزآمیز و سرگرم‌کننده ارائه کن.",
	}
	tone_instruction = tone_prompts.get(tone, "پاسخ را به زبان فارسی ارائه کن.")

	prompt = (
	f"شما ParvizGPT هستید، یک دستیار هوش مصنوعی که توسط امیر مهدی پرویز ساخته شده است. "
	f"همیشه به فارسی پاسخ دهید. {tone_instruction} "
	f"\n\nاطلاعات مرتبط:\n{knowledge}\n\nسوال: {query}\nپاسخ:"
	)

	response = model.invoke(prompt)
	response_text = response.content

	cleaned_response = remove_think_sections(response_text)

	chat_history.append((query, cleaned_response))

	total_tokens, price = calculate_price(prompt, cleaned_response)

	summary = summarize_chat() if summarize else "خلاصه‌سازی غیرفعال است."

	return cleaned_response, summary, total_tokens, price

	except Exception as e:
	return f"خطا: {str(e)}", "", 0, "0 دلار"

	def summarize_chat():
	"""خلاصه‌سازی مکالمات اخیر."""
	chat_text = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in chat_history])
	summary_prompt = f"یک خلاصه کوتاه و دقیق از مکالمه زیر ارائه کن:\n\n{chat_text}\n\nخلاصه:"
	summary_response = model.invoke(summary_prompt)
	return summary_response.content

	def chat_with_bot(query, file, summarize, tone):
	"""رابط Gradio برای چت."""
	file_path = file.name if file else None
	response, summary, total_tokens, price = answer_query(query, file_path, summarize, tone)
	return response, summary, total_tokens, price

	with gr.Blocks() as demo:
	gr.Markdown("## 🤖 Parviz GPT")
	gr.Markdown("یک فایل (PDF یا TXT) آپلود کنید و سوال خود را بپرسید.")

	with gr.Column():

	chat_output = gr.Textbox(label="📝 تاریخچه چت", interactive=False, lines=10)
	summary_output = gr.Textbox(label="📌 خلاصه مکالمه", interactive=False)

	query_input = gr.Textbox(label="❓ سوال خود را وارد کنید", placeholder="مثلاً: کی تو را ساخته است؟")

	with gr.Row():
	summarize_checkbox = gr.Checkbox(label="📌 خلاصه‌ساز را فعال کن")
	submit_button = gr.Button("🚀 ارسال")
	tone_dropdown = gr.Dropdown(label="🎭 انتخاب لحن پاسخ", choices=["رسمی", "محاوره‌ای", "علمی", "طنزآمیز"], value="رسمی")


	with gr.Row():
	token_count = gr.Textbox(label="🔢 تعداد توکن‌ها", interactive=False)
	token_price = gr.Textbox(label="💰 هزینه تخمینی", interactive=False)

	with gr.Row():
	file_input = gr.File(label="📂 فایل خود را آپلود کنید", file_types=[".pdf", ".txt"])

	query_input.submit(fn=chat_with_bot,
	inputs=[query_input, file_input, summarize_checkbox, tone_dropdown],
	outputs=[chat_output, summary_output, token_count, token_price])

	submit_button.click(fn=chat_with_bot,
	inputs=[query_input, file_input, summarize_checkbox, tone_dropdown],
	outputs=[chat_output, summary_output, token_count, token_price])

	demo.launch()