# ✅ Arabic RAG System with Cascaded Memory Pipeline # Step-by-step: Chunking -> Question -> Retrieval -> Summarization import gradio as gr from rag_pipeline import RAGPipeline from utils import process_documents rag = RAGPipeline() def log_message(msg, logs): logs = logs + msg + "\n" return logs def upload_and_prepare(files, logs): logs = log_message("[RAG] بدء معالجة الملفات...", logs) all_chunks = [] for file in files: logs = log_message(f"[RAG] معالجة الملف: {file.name}", logs) chunks = process_documents(file.name) all_chunks.extend(chunks) logs = log_message(f"[RAG] تم استخراج {len(chunks)} مقطع من {file.name}", logs) rag.build_index(all_chunks) logs = log_message(f"[RAG] تم بناء الفهرس بـ {len(all_chunks)} مقطع.", logs) return all_chunks, logs, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True) def store_question(q): return q def retrieve_passages(question, chunks, logs): passages = rag.retrieve_passages(question) passages_text = "\n---\n".join(passages) logs = log_message(f"[RAG] تم العثور على {len(passages)} مقطع مرتبط بالسؤال.", logs) return passages, passages_text, logs def generate_summary_only(passages_text, logs): summary = rag.summarize_text(passages_text) if not summary.strip(): logs = log_message("[RAG] ⚠️ لم يتم توليد ملخص. تحقق من محتوى المقاطع.", logs) else: logs = log_message("[RAG] تم توليد الملخص بنجاح.\n📌 الملخص:\n" + summary, logs) return summary, logs def generate_final_answer(question, summary_text, logs): answer, _ = rag.generate_answer_from_passages(question, summary_text) if not answer.strip(): logs = log_message("[RAG] ⚠️ لم يتم توليد إجابة. ربما النص طويل أو النموذج فشل.", logs) else: logs = log_message("[RAG] ✅ تم توليد الإجابة النهائية.", logs) return answer, logs with gr.Blocks() as demo: logs = gr.State("") chunks = gr.State([]) question_state = gr.State("") retrieved_passages = gr.State([]) gr.Markdown("# 🕌 نظام استرجاع المعرفة - مراحل متسلسلة") with gr.Row(): files_input = gr.File(file_types=[".pdf", ".docx", ".txt"], file_count="multiple") upload_btn = gr.Button("⬆️ رفع وتجهيز المقاطع") with gr.Row(): question_input = gr.Textbox(label="❓ اكتب سؤالك") store_question_btn = gr.Button("📥 تخزين السؤال") find_btn = gr.Button("🔍 بحث عن المقاطع المرتبطة") summarize_btn = gr.Button("📝 توليد الملخص") answer_btn = gr.Button("✍️ توليد الإجابة النهائية") passage_output = gr.Textbox(label="📄 المقاطع المرتبطة", lines=8) summary_output = gr.Textbox(label="📌 الملخص المستخدم", lines=5) answer_output = gr.Textbox(label="✅ الإجابة النهائية", lines=5) logs_output = gr.Textbox(label="📜 سجل العمليات", lines=10, interactive=False) upload_btn.click(upload_and_prepare, inputs=[files_input, logs], outputs=[chunks, logs_output, store_question_btn, find_btn, summarize_btn]) store_question_btn.click(store_question, inputs=question_input, outputs=question_state) find_btn.click(retrieve_passages, inputs=[question_state, chunks, logs], outputs=[retrieved_passages, passage_output, logs_output]) summarize_btn.click(generate_summary_only, inputs=[passage_output, logs], outputs=[summary_output, logs_output]) answer_btn.click(generate_final_answer, inputs=[question_state, summary_output, logs], outputs=[answer_output, logs_output]) demo.launch()