ramy2018 commited on
Commit
46bb209
·
verified ·
1 Parent(s): d854811

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -32
app.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  import gradio as gr
2
  from rag_pipeline import RAGPipeline
3
  from utils import process_documents
@@ -8,7 +11,7 @@ def log_message(msg, logs):
8
  logs = logs + msg + "\n"
9
  return logs
10
 
11
- def upload_and_index(files, logs):
12
  logs = log_message("[RAG] بدء معالجة الملفات...", logs)
13
  all_chunks = []
14
  for file in files:
@@ -18,48 +21,48 @@ def upload_and_index(files, logs):
18
  logs = log_message(f"[RAG] تم استخراج {len(chunks)} مقطع من {file.name}", logs)
19
 
20
  rag.build_index(all_chunks)
21
- logs = log_message("[RAG] تم بناء الفهرس.", logs)
22
-
23
- # Summarize only first N chunks for performance
24
- logs = log_message("[RAG] بدأ التلخيص التلقائي لأول 20 مقطع...", logs)
25
- rag.summarize_all_chunks(max_chunks=20)
26
- logs = log_message("[RAG] تم التلخيص لجميع المقاطع المحددة.", logs)
27
-
28
- return logs, gr.update(visible=True), gr.update(visible=True)
29
-
30
- def answer_question(question, logs):
31
- logs = log_message(f"[RAG] استلام السؤال: {question}", logs)
32
-
33
- answer, sources, combined_summary = rag.answer(question)
34
 
35
- if not combined_summary.strip():
36
- logs = log_message("[RAG] لم يتم إنشاء ملخص صالح.", logs)
37
- return "", logs
38
 
39
- if not answer.strip():
40
- logs = log_message("[RAG] لم يتم توليد إجابة. حاول صياغة السؤال بشكل أوضح.", logs)
41
- else:
42
- logs = log_message("[RAG] تم توليد الإجابة بنجاح.", logs)
43
- logs = log_message(f"[RAG] المقاطع المستخدمة: {sources}", logs)
44
 
45
- return answer, combined_summary, logs
 
 
 
46
 
47
  with gr.Blocks() as demo:
48
  logs = gr.State("")
49
- gr.Markdown("# 🕌 نظام استرجاع المعرفة باللغة العربية")
 
 
 
 
50
 
51
  with gr.Row():
52
- files_input = gr.File(file_types=[".pdf", ".docx", ".txt"], file_count="multiple", label="📂 رفع الملفات")
53
- upload_btn = gr.Button("🔄 رفع وبناء الفهرس وتلخيص")
54
 
55
- question_input = gr.Textbox(label="❓ اكتب سؤالك هنا", visible=False)
56
- answer_btn = gr.Button("✍️ أجب عن السؤال", visible=False)
 
57
 
58
- logs_output = gr.Textbox(label="📜 سجل العمليات", lines=10, interactive=False)
 
 
 
59
  summary_output = gr.Textbox(label="📌 الملخص المستخدم", lines=5)
60
- answer_output = gr.Textbox(label=" الإجابة النهائية", lines=5)
61
 
62
- upload_btn.click(upload_and_index, inputs=[files_input, logs], outputs=[logs_output, question_input, answer_btn])
63
- answer_btn.click(answer_question, inputs=[question_input, logs], outputs=[answer_output, summary_output, logs_output])
 
 
64
 
65
  demo.launch()
 
1
+ # ✅ Arabic RAG System with Cascaded Memory Pipeline
2
+ # Step-by-step: Chunking -> Question -> Retrieval -> Summarization
3
+
4
  import gradio as gr
5
  from rag_pipeline import RAGPipeline
6
  from utils import process_documents
 
11
  logs = logs + msg + "\n"
12
  return logs
13
 
14
+ def upload_and_prepare(files, logs):
15
  logs = log_message("[RAG] بدء معالجة الملفات...", logs)
16
  all_chunks = []
17
  for file in files:
 
21
  logs = log_message(f"[RAG] تم استخراج {len(chunks)} مقطع من {file.name}", logs)
22
 
23
  rag.build_index(all_chunks)
24
+ logs = log_message(f"[RAG] تم بناء الفهرس بـ {len(all_chunks)} مقطع.", logs)
25
+ return all_chunks, logs, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ def store_question(q):
28
+ return q
 
29
 
30
+ def retrieve_passages(question, chunks, logs):
31
+ passages = rag.retrieve_passages(question)
32
+ logs = log_message(f"[RAG] تم العثور على {len(passages)} مقطع مرتبط بالسؤال.", logs)
33
+ return passages, logs
 
34
 
35
+ def summarize_answer(question, passages, logs):
36
+ answer, summary = rag.generate_answer_from_passages(question, passages)
37
+ logs = log_message("[RAG] تم توليد الإجابة النهائية.", logs)
38
+ return answer, summary, logs
39
 
40
  with gr.Blocks() as demo:
41
  logs = gr.State("")
42
+ chunks = gr.State([])
43
+ question_state = gr.State("")
44
+ retrieved_passages = gr.State([])
45
+
46
+ gr.Markdown("# 🕌 نظام استرجاع المعرفة - مراحل متسلسلة")
47
 
48
  with gr.Row():
49
+ files_input = gr.File(file_types=[".pdf", ".docx", ".txt"], file_count="multiple")
50
+ upload_btn = gr.Button("⬆️ رفع وتجهيز المقاطع")
51
 
52
+ with gr.Row():
53
+ question_input = gr.Textbox(label=" اكتب سؤالك")
54
+ store_question_btn = gr.Button("📥 تخزين السؤال")
55
 
56
+ find_btn = gr.Button("🔍 بحث عن المقاطع المرتبطة")
57
+ answer_btn = gr.Button("✍️ تلخيص وتوليد الإجابة")
58
+
59
+ answer_output = gr.Textbox(label="✅ الإجابة", lines=5)
60
  summary_output = gr.Textbox(label="📌 الملخص المستخدم", lines=5)
61
+ logs_output = gr.Textbox(label="📜 سجل العمليات", lines=10, interactive=False)
62
 
63
+ upload_btn.click(upload_and_prepare, inputs=[files_input, logs], outputs=[chunks, logs_output, store_question_btn, find_btn, answer_btn])
64
+ store_question_btn.click(store_question, inputs=question_input, outputs=question_state)
65
+ find_btn.click(retrieve_passages, inputs=[question_state, chunks, logs], outputs=[retrieved_passages, logs_output])
66
+ answer_btn.click(summarize_answer, inputs=[question_state, retrieved_passages, logs], outputs=[answer_output, summary_output, logs_output])
67
 
68
  demo.launch()