Spaces:

GIGAParviz
/

Parviz_Mind

Sleeping

App Files Files

Parviz_Mind / app.py

GIGAParviz

Update app.py

33a37f2 verified 5 months ago

raw

history blame

12.8 kB

	import os
	import re
	import sqlite3
	from datetime import datetime
	from pypdf import PdfReader
	import gradio as gr
	from langchain_groq import ChatGroq
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain.vectorstores import Chroma
	from langchain_core.documents import Document
	from langchain_text_splitters import RecursiveCharacterTextSplitter

	models = ["deepseek-r1-distill-llama-70b", "llama-3.3-70b-versatile", "gemma2-9b-it"]
	default_model = models[0]

	class DatabaseManager:
	def __init__(self, db_name="chat_history.db"):
	self.conn = sqlite3.connect(db_name)
	self._create_tables()
	def _create_tables(self):
	cursor = self.conn.cursor()
	cursor.execute(
	'''CREATE TABLE IF NOT EXISTS chat_summaries
	(id INTEGER PRIMARY KEY AUTOINCREMENT,
	timestamp DATETIME,
	summary TEXT,
	model_used TEXT,
	token_count INT)'''
	)
	self.conn.commit()
	def save_summary(self, summary_data):
	try:
	cursor = self.conn.cursor()
	cursor.execute(
	'''INSERT INTO chat_summaries
	(timestamp, summary, model_used, token_count)
	VALUES (?, ?, ?, ?)''',
	(datetime.now(),
	summary_data['summary'],
	summary_data['model'],
	summary_data['tokens'])
	)
	self.conn.commit()
	return True
	except Exception as e:
	print(f"Database error: {str(e)}")
	return False
	def load_summaries(self, limit=5):
	cursor = self.conn.cursor()
	cursor.execute(
	"SELECT summary FROM chat_summaries ORDER BY id DESC LIMIT ?",
	(limit,)
	)
	rows = cursor.fetchall()
	return "\n".join([row[0] for row in rows])

	class AICore:
	def __init__(self):
	self.embeddings = HuggingFaceEmbeddings(model_name="heydariAI/persian-embeddings")
	self.vector_store = Chroma(embedding_function=self.embeddings)
	self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	self.chat_history = []
	self.price_per_token = 0.00001
	self.api_key = "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"
	self.model = ChatGroq(api_key=self.api_key, model_name=default_model)
	self.db = DatabaseManager()
	def _init_model(self, model_name):
	if self.model.model_name != model_name:
	self.model = ChatGroq(api_key=self.api_key, model_name=model_name)
	def summarize_chat(self):
	chat_text = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in self.chat_history])
	summary_prompt = f"یک خلاصه کوتاه از مکالمه زیر ارائه کن:\n\n{chat_text}\n\nخلاصه:"
	summary_response = self.model.invoke(summary_prompt)
	return summary_response.content
	def process_file(self, file_obj):
	if not file_obj:
	return None
	file_path = file_obj.name if hasattr(file_obj, "name") else file_obj
	file_extension = os.path.splitext(file_path)[1].lower()
	try:
	if file_extension == ".pdf":
	reader = PdfReader(file_path)
	file_text = "\n".join(page.extract_text() for page in reader.pages)
	elif file_extension == ".txt":
	with open(file_path, "r", encoding="utf-8") as f:
	file_text = f.read()
	else:
	raise ValueError(f"Unsupported file format: {file_extension}")
	file_docs = [Document(page_content=file_text, metadata={"source": "uploaded_file"})]
	file_splits = self.text_splitter.split_documents(file_docs)
	self.vector_store.add_documents(file_splits)
	return file_text
	except Exception as e:
	raise RuntimeError(f"Error processing file: {str(e)}")
	def count_tokens(self, text):
	return len(text.split())
	def calculate_price(self, input_text, output_text):
	input_tokens = self.count_tokens(input_text)
	output_tokens = self.count_tokens(output_text)
	total_tokens = input_tokens + output_tokens
	total_price = total_tokens * self.price_per_token
	return total_tokens, f"{total_price:.6f} دلار"
	def remove_think_sections(self, response_text):
	return re.sub(r"<think>.*?</think>", "", response_text, flags=re.DOTALL)
	def filter_to_persian(self, text):
	return re.sub(r'[^\u0600-\u06FF\s\.,؛؟!٪،0-9]', '', text)
	def answer_query(self, query, file_obj, summarize, tone, model_name, creativity,
	keywords, language, response_length, welcome_message, exclusion_words):
	self._init_model(model_name)
	if file_obj:
	self.process_file(file_obj)
	search_query = f"{keywords} {query}" if keywords else query
	retrieved_docs = self.vector_store.similarity_search(search_query, k=3)
	knowledge = "\n\n".join(doc.page_content for doc in retrieved_docs)
	tone_prompts = {
	"رسمی": "پاسخ را با لحنی رسمی و مودبانه ارائه کن.",
	"محاوره‌ای": "پاسخ را به صورت دوستانه ارائه کن.",
	"علمی": "پاسخ را با استدلال‌های منطقی ارائه کن.",
	"طنزآمیز": "پاسخ را با لحنی طنزآمیز ارائه کن.",
	}
	tone_instruction = tone_prompts.get(tone, f"پاسخ را به زبان {language} ارائه کن.")
	language_instruction = (f"پاسخ را فقط به زبان {language} ارائه کن و از زبان دیگری استفاده نکن مگر آنکه بخواهی کد بنویسی "
	f"که در آن صورت فقط از زبان انگلیسی استفاده کن مگر اینکه کاربر از تو درخواست کند از زبان دیگری استفاده بکنی و از زبان چینی استفاده نکن.") if language else ""
	if response_length == "کوتاه":
	length_instruction = "پاسخ را به صورت مختصر ارائه کن."
	elif response_length == "بلند":
	length_instruction = "پاسخ را به صورت مفصل و جامع ارائه کن."
	else:
	length_instruction = ""
	exclusion_instruction = f"از کلمات زیر در پاسخ استفاده نکن: {exclusion_words}" if exclusion_words else ""
	prompt = (
	f"شما Parviz Mind هستید، یک دستیار هوش مصنوعی ساخته شده توسط امیرمهدی پرویز دانشجو دانشگاه صنعتی کرمانشاه "
	f"{tone_instruction} {language_instruction} {length_instruction} {exclusion_instruction}\n\n"
	)
	if welcome_message and not self.chat_history:
	prompt = f"{welcome_message}\n\n" + prompt
	if self.chat_history:
	conversation_history = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in self.chat_history])
	prompt = f"{conversation_history}\n\n" + prompt
	prompt += f"اطلاعات مرتبط:\n{knowledge}\n\nسوال: {query}\nپاسخ:"
	response = self.model.invoke(prompt, temperature=creativity)
	cleaned_response = self.remove_think_sections(response.content)
	cleaned_response = self.filter_to_persian(cleaned_response)
	self.chat_history.append((query, cleaned_response))
	total_tokens, price = self.calculate_price(prompt, cleaned_response)
	summary_text = self.summarize_chat() if summarize else "خلاصه‌سازی غیرفعال است."
	if summarize and summary_text != "خلاصه‌سازی غیرفعال است.":
	self.db.save_summary({
	'summary': summary_text,
	'model': model_name,
	'tokens': total_tokens
	})
	return cleaned_response, summary_text, total_tokens, price
	def clear_history(self):
	self.chat_history = []
	return self.chat_history

	class ChatInterface:
	def __init__(self, ai_core: AICore):
	self.ai = ai_core
	self._create_interface()
	def _create_interface(self):
	with gr.Blocks() as self.interface:
	gr.Markdown("## 🤖 Parviz Mind")
	gr.Markdown("یک فایل (PDF یا TXT) آپلود کنید و سوال خود را بپرسید.")
	self.chatbot = gr.Chatbot(label="💬 تاریخچه چت")
	self.query_input = gr.Textbox(label="❓ سوال خود را وارد کنید")
	self.summarize_checkbox = gr.Checkbox(label="📌 خلاصه‌ساز را فعال کن")
	self.submit_button = gr.Button("🚀 ارسال")
	self.del_button = gr.Button("🗑 پاک کردن حافظه")
	self.file_input = gr.File(label="📂 آپلود فایل", file_types=[".pdf", ".txt"])
	with gr.Accordion("خلاصه چت", open=False):
	with gr.Row():
	self.summary_output = gr.Textbox(label="📌 خلاصه مکالمه", interactive=False)
	with gr.Accordion("تنظیمات پیشرفته", open=False):
	with gr.Row():
	self.model_dropdown = gr.Dropdown(label="🔍 انتخاب مدل", choices=models, value=default_model)
	self.tone_dropdown = gr.Dropdown(label="🎭 لحن پاسخ", choices=["رسمی", "محاوره‌ای", "علمی", "طنزآمیز"], value="رسمی")
	self.language_dropdown = gr.Dropdown(label="🌐 زبان چت بات", choices=["فارسی", "انگلیسی", "عربی"], value="فارسی")
	self.token_count = gr.Textbox(label="🔢 تعداد توکن‌ها", interactive=False)
	self.token_price = gr.Textbox(label="💰 هزینه تخمینی", interactive=False)
	with gr.Row():
	self.creativity_slider = gr.Slider(label="🎨 خلاقیت (Temperature)", minimum=0.0, maximum=1.0, step=0.1, value=0.7)
	self.response_length_dropdown = gr.Dropdown(label="📏 طول پاسخ", choices=["کوتاه", "بلند"], value="بلند")
	self.keywords_input = gr.Textbox(label="🔑 کلمات کلیدی (اختیاری)")
	self.welcome_message_input = gr.Textbox(label="👋 پیام خوش آمدگویی (اختیاری)")
	self.exclusion_words_input = gr.Textbox(label="🚫 کلمات استثنا (اختیاری)")
	self.del_button.click(
	self.clear_chat,
	inputs=[],
	outputs=[self.chatbot, self.summary_output, self.token_count, self.token_price]
	)
	self.submit_button.click(
	self.process_chat,
	inputs=[
	self.query_input, self.file_input, self.summarize_checkbox,
	self.tone_dropdown, self.model_dropdown, self.creativity_slider,
	self.keywords_input, self.language_dropdown, self.response_length_dropdown,
	self.welcome_message_input, self.exclusion_words_input
	],
	outputs=[self.chatbot, self.summary_output, self.token_count, self.token_price]
	)
	self.query_input.submit(
	self.process_chat,
	inputs=[
	self.query_input, self.file_input, self.summarize_checkbox,
	self.tone_dropdown, self.model_dropdown, self.creativity_slider,
	self.keywords_input, self.language_dropdown, self.response_length_dropdown,
	self.welcome_message_input, self.exclusion_words_input
	],
	outputs=[self.chatbot, self.summary_output, self.token_count, self.token_price]
	)
	def process_chat(self, query, file_obj, summarize, tone, model_name, creativity,
	keywords, language, response_length, welcome_message, exclusion_words):
	response, summary, total_tokens, price = self.ai.answer_query(
	query, file_obj, summarize, tone, model_name, creativity,
	keywords, language, response_length, welcome_message, exclusion_words
	)
	return self.ai.chat_history, summary, total_tokens, price
	def clear_chat(self):
	self.ai.clear_history()
	return self.ai.chat_history, "", 0, "0 دلار"
	def launch(self):
	self.interface.launch()

	if __name__ == "__main__":
	ai_core = AICore()
	chat_app = ChatInterface(ai_core)
	chat_app.launch()