pope30 / app.py
ramy2018's picture
Update app.py
110e6e2 verified
# ✅ Arabic RAG System with Cascaded Memory Pipeline
# Step-by-step: Chunking -> Question -> Retrieval -> Summarization
import gradio as gr
from rag_pipeline import RAGPipeline
from utils import process_documents
rag = RAGPipeline()
def log_message(msg, logs):
logs = logs + msg + "\n"
return logs
def upload_and_prepare(files, logs):
logs = log_message("[RAG] بدء معالجة الملفات...", logs)
all_chunks = []
for file in files:
logs = log_message(f"[RAG] معالجة الملف: {file.name}", logs)
chunks = process_documents(file.name)
all_chunks.extend(chunks)
logs = log_message(f"[RAG] تم استخراج {len(chunks)} مقطع من {file.name}", logs)
rag.build_index(all_chunks)
logs = log_message(f"[RAG] تم بناء الفهرس بـ {len(all_chunks)} مقطع.", logs)
return all_chunks, logs, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
def store_question(q):
return q
def retrieve_passages(question, chunks, logs):
passages = rag.retrieve_passages(question)
passages_text = "\n---\n".join(passages)
logs = log_message(f"[RAG] تم العثور على {len(passages)} مقطع مرتبط بالسؤال.", logs)
return passages, passages_text, logs
def generate_summary_only(passages_text, logs):
summary = rag.summarize_text(passages_text)
if not summary.strip():
logs = log_message("[RAG] ⚠️ لم يتم توليد ملخص. تحقق من محتوى المقاطع.", logs)
else:
logs = log_message("[RAG] تم توليد الملخص بنجاح.\n📌 الملخص:\n" + summary, logs)
return summary, logs
def generate_final_answer(question, summary_text, logs):
answer, _ = rag.generate_answer_from_passages(question, summary_text)
if not answer.strip():
logs = log_message("[RAG] ⚠️ لم يتم توليد إجابة. ربما النص طويل أو النموذج فشل.", logs)
else:
logs = log_message("[RAG] ✅ تم توليد الإجابة النهائية.", logs)
return answer, logs
with gr.Blocks() as demo:
logs = gr.State("")
chunks = gr.State([])
question_state = gr.State("")
retrieved_passages = gr.State([])
gr.Markdown("# 🕌 نظام استرجاع المعرفة - مراحل متسلسلة")
with gr.Row():
files_input = gr.File(file_types=[".pdf", ".docx", ".txt"], file_count="multiple")
upload_btn = gr.Button("⬆️ رفع وتجهيز المقاطع")
with gr.Row():
question_input = gr.Textbox(label="❓ اكتب سؤالك")
store_question_btn = gr.Button("📥 تخزين السؤال")
find_btn = gr.Button("🔍 بحث عن المقاطع المرتبطة")
summarize_btn = gr.Button("📝 توليد الملخص")
answer_btn = gr.Button("✍️ توليد الإجابة النهائية")
passage_output = gr.Textbox(label="📄 المقاطع المرتبطة", lines=8)
summary_output = gr.Textbox(label="📌 الملخص المستخدم", lines=5)
answer_output = gr.Textbox(label="✅ الإجابة النهائية", lines=5)
logs_output = gr.Textbox(label="📜 سجل العمليات", lines=10, interactive=False)
upload_btn.click(upload_and_prepare, inputs=[files_input, logs], outputs=[chunks, logs_output, store_question_btn, find_btn, summarize_btn])
store_question_btn.click(store_question, inputs=question_input, outputs=question_state)
find_btn.click(retrieve_passages, inputs=[question_state, chunks, logs], outputs=[retrieved_passages, passage_output, logs_output])
summarize_btn.click(generate_summary_only, inputs=[passage_output, logs], outputs=[summary_output, logs_output])
answer_btn.click(generate_final_answer, inputs=[question_state, summary_output, logs], outputs=[answer_output, logs_output])
demo.launch()