File size: 3,923 Bytes
604fb6e b6f8046 46bb209 b6f8046 46bb209 ba71242 46bb209 ba71242 46bb209 481da23 46bb209 481da23 ba71242 823bbfd 110e6e2 823bbfd 604fb6e 823bbfd b6f8046 46bb209 b6f8046 46bb209 734afea 46bb209 b6f8046 46bb209 823bbfd 46bb209 481da23 734afea 481da23 46bb209 b6f8046 823bbfd 46bb209 481da23 823bbfd b6f8046 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# ✅ Arabic RAG System with Cascaded Memory Pipeline
# Step-by-step: Chunking -> Question -> Retrieval -> Summarization
import gradio as gr
from rag_pipeline import RAGPipeline
from utils import process_documents
rag = RAGPipeline()
def log_message(msg, logs):
logs = logs + msg + "\n"
return logs
def upload_and_prepare(files, logs):
logs = log_message("[RAG] بدء معالجة الملفات...", logs)
all_chunks = []
for file in files:
logs = log_message(f"[RAG] معالجة الملف: {file.name}", logs)
chunks = process_documents(file.name)
all_chunks.extend(chunks)
logs = log_message(f"[RAG] تم استخراج {len(chunks)} مقطع من {file.name}", logs)
rag.build_index(all_chunks)
logs = log_message(f"[RAG] تم بناء الفهرس بـ {len(all_chunks)} مقطع.", logs)
return all_chunks, logs, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
def store_question(q):
return q
def retrieve_passages(question, chunks, logs):
passages = rag.retrieve_passages(question)
passages_text = "\n---\n".join(passages)
logs = log_message(f"[RAG] تم العثور على {len(passages)} مقطع مرتبط بالسؤال.", logs)
return passages, passages_text, logs
def generate_summary_only(passages_text, logs):
summary = rag.summarize_text(passages_text)
if not summary.strip():
logs = log_message("[RAG] ⚠️ لم يتم توليد ملخص. تحقق من محتوى المقاطع.", logs)
else:
logs = log_message("[RAG] تم توليد الملخص بنجاح.\n📌 الملخص:\n" + summary, logs)
return summary, logs
def generate_final_answer(question, summary_text, logs):
answer, _ = rag.generate_answer_from_passages(question, summary_text)
if not answer.strip():
logs = log_message("[RAG] ⚠️ لم يتم توليد إجابة. ربما النص طويل أو النموذج فشل.", logs)
else:
logs = log_message("[RAG] ✅ تم توليد الإجابة النهائية.", logs)
return answer, logs
with gr.Blocks() as demo:
logs = gr.State("")
chunks = gr.State([])
question_state = gr.State("")
retrieved_passages = gr.State([])
gr.Markdown("# 🕌 نظام استرجاع المعرفة - مراحل متسلسلة")
with gr.Row():
files_input = gr.File(file_types=[".pdf", ".docx", ".txt"], file_count="multiple")
upload_btn = gr.Button("⬆️ رفع وتجهيز المقاطع")
with gr.Row():
question_input = gr.Textbox(label="❓ اكتب سؤالك")
store_question_btn = gr.Button("📥 تخزين السؤال")
find_btn = gr.Button("🔍 بحث عن المقاطع المرتبطة")
summarize_btn = gr.Button("📝 توليد الملخص")
answer_btn = gr.Button("✍️ توليد الإجابة النهائية")
passage_output = gr.Textbox(label="📄 المقاطع المرتبطة", lines=8)
summary_output = gr.Textbox(label="📌 الملخص المستخدم", lines=5)
answer_output = gr.Textbox(label="✅ الإجابة النهائية", lines=5)
logs_output = gr.Textbox(label="📜 سجل العمليات", lines=10, interactive=False)
upload_btn.click(upload_and_prepare, inputs=[files_input, logs], outputs=[chunks, logs_output, store_question_btn, find_btn, summarize_btn])
store_question_btn.click(store_question, inputs=question_input, outputs=question_state)
find_btn.click(retrieve_passages, inputs=[question_state, chunks, logs], outputs=[retrieved_passages, passage_output, logs_output])
summarize_btn.click(generate_summary_only, inputs=[passage_output, logs], outputs=[summary_output, logs_output])
answer_btn.click(generate_final_answer, inputs=[question_state, summary_output, logs], outputs=[answer_output, logs_output])
demo.launch()
|