File size: 3,923 Bytes
604fb6e
 
 
b6f8046
 
 
 
 
 
 
 
 
 
46bb209
b6f8046
 
 
 
 
 
 
 
 
46bb209
 
ba71242
46bb209
 
ba71242
46bb209
 
481da23
46bb209
481da23
ba71242
823bbfd
 
110e6e2
 
 
 
823bbfd
 
 
 
604fb6e
 
 
 
823bbfd
b6f8046
 
 
46bb209
 
 
 
 
b6f8046
 
46bb209
 
734afea
46bb209
 
 
b6f8046
46bb209
823bbfd
 
46bb209
481da23
734afea
481da23
46bb209
b6f8046
823bbfd
46bb209
481da23
823bbfd
 
b6f8046
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# ✅ Arabic RAG System with Cascaded Memory Pipeline
# Step-by-step: Chunking -> Question -> Retrieval -> Summarization

import gradio as gr
from rag_pipeline import RAGPipeline
from utils import process_documents

rag = RAGPipeline()

def log_message(msg, logs):
    logs = logs + msg + "\n"
    return logs

def upload_and_prepare(files, logs):
    logs = log_message("[RAG] بدء معالجة الملفات...", logs)
    all_chunks = []
    for file in files:
        logs = log_message(f"[RAG] معالجة الملف: {file.name}", logs)
        chunks = process_documents(file.name)
        all_chunks.extend(chunks)
        logs = log_message(f"[RAG] تم استخراج {len(chunks)} مقطع من {file.name}", logs)

    rag.build_index(all_chunks)
    logs = log_message(f"[RAG] تم بناء الفهرس بـ {len(all_chunks)} مقطع.", logs)
    return all_chunks, logs, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)

def store_question(q):
    return q

def retrieve_passages(question, chunks, logs):
    passages = rag.retrieve_passages(question)
    passages_text = "\n---\n".join(passages)
    logs = log_message(f"[RAG] تم العثور على {len(passages)} مقطع مرتبط بالسؤال.", logs)
    return passages, passages_text, logs

def generate_summary_only(passages_text, logs):
    summary = rag.summarize_text(passages_text)
    if not summary.strip():
        logs = log_message("[RAG] ⚠️ لم يتم توليد ملخص. تحقق من محتوى المقاطع.", logs)
    else:
        logs = log_message("[RAG] تم توليد الملخص بنجاح.\n📌 الملخص:\n" + summary, logs)
    return summary, logs

def generate_final_answer(question, summary_text, logs):
    answer, _ = rag.generate_answer_from_passages(question, summary_text)
    if not answer.strip():
        logs = log_message("[RAG] ⚠️ لم يتم توليد إجابة. ربما النص طويل أو النموذج فشل.", logs)
    else:
        logs = log_message("[RAG] ✅ تم توليد الإجابة النهائية.", logs)
    return answer, logs

with gr.Blocks() as demo:
    logs = gr.State("")
    chunks = gr.State([])
    question_state = gr.State("")
    retrieved_passages = gr.State([])

    gr.Markdown("# 🕌 نظام استرجاع المعرفة - مراحل متسلسلة")

    with gr.Row():
        files_input = gr.File(file_types=[".pdf", ".docx", ".txt"], file_count="multiple")
        upload_btn = gr.Button("⬆️ رفع وتجهيز المقاطع")

    with gr.Row():
        question_input = gr.Textbox(label="❓ اكتب سؤالك")
        store_question_btn = gr.Button("📥 تخزين السؤال")

    find_btn = gr.Button("🔍 بحث عن المقاطع المرتبطة")
    summarize_btn = gr.Button("📝 توليد الملخص")
    answer_btn = gr.Button("✍️ توليد الإجابة النهائية")

    passage_output = gr.Textbox(label="📄 المقاطع المرتبطة", lines=8)
    summary_output = gr.Textbox(label="📌 الملخص المستخدم", lines=5)
    answer_output = gr.Textbox(label="✅ الإجابة النهائية", lines=5)
    logs_output = gr.Textbox(label="📜 سجل العمليات", lines=10, interactive=False)

    upload_btn.click(upload_and_prepare, inputs=[files_input, logs], outputs=[chunks, logs_output, store_question_btn, find_btn, summarize_btn])
    store_question_btn.click(store_question, inputs=question_input, outputs=question_state)
    find_btn.click(retrieve_passages, inputs=[question_state, chunks, logs], outputs=[retrieved_passages, passage_output, logs_output])
    summarize_btn.click(generate_summary_only, inputs=[passage_output, logs], outputs=[summary_output, logs_output])
    answer_btn.click(generate_final_answer, inputs=[question_state, summary_output, logs], outputs=[answer_output, logs_output])

demo.launch()