Batnini commited on
Commit
c6f9a1b
·
verified ·
1 Parent(s): b7c734d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +179 -45
app.py CHANGED
@@ -1,51 +1,185 @@
1
- import gradio as gr
2
- from tools.arabic_generator import ArabicTextGenerator
3
- from tools.quran_search import QuranSearchEngine
 
 
 
 
4
 
5
- text_gen = ArabicTextGenerator()
6
- quran = QuranSearchEngine()
7
-
8
- with gr.Blocks(title="الأدوات العربية") as app:
9
- # Tab 1: Arabic Generator (unchanged)
10
- with gr.Tab("🖊️ مولد النصوص"):
11
- text_input = gr.Textbox(label="النص الأولي")
12
- length_slider = gr.Slider(50, 300, value=100, label="طول النص")
13
- gen_btn = gr.Button("توليد")
14
- text_output = gr.Textbox(label="النص المولد", lines=6)
 
 
 
15
 
16
- gen_btn.click(
17
- text_gen.generate,
18
- inputs=[text_input, length_slider],
19
- outputs=text_output
20
- )
21
-
22
- # Tab 2: Quran Surah Viewer (unchanged)
23
- with gr.Tab("📖 القرآن الكريم"):
24
- surah_dropdown = gr.Dropdown(
25
- label="اختر سورة",
26
- choices=quran.get_surahs(),
27
- value=quran.get_surahs()[0][1] if quran.get_surahs() else 1
28
- )
29
- show_btn = gr.Button("عرض السورة")
30
- quran_output = gr.Textbox(label="النص القرآني", lines=15)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- show_btn.click(
33
- quran.get_surah_text,
34
- inputs=surah_dropdown,
35
- outputs=quran_output
36
- )
 
 
 
 
 
 
 
37
 
38
- # New Tab 3: Quranic Verse Search (Semantic)
39
- with gr.Tab("🔍 بحث آيات قرآنية"):
40
- search_input = gr.Textbox(label="أدخل استعلام البحث (مثال: الرحمة والمغفرة)")
41
- top_k_slider = gr.Slider(1, 10, value=5, label="عدد النتائج")
42
- search_btn = gr.Button("بحث")
43
- search_output = gr.Textbox(label="الآيات ذات الصلة", lines=15)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- search_btn.click(
46
- quran.search_verses,
47
- inputs=[search_input, top_k_slider],
48
- outputs=search_output
49
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- app.launch()
 
 
1
+ import requests
2
+ import logging
3
+ import numpy as np
4
+ from sentence_transformers import SentenceTransformer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ from config import QURAN_DATA_SOURCES, MODEL_NAME, CHUNK_SIZE
7
+ import time
8
 
9
+ class QuranSearchEngine:
10
+ def __init__(self):
11
+ self.api_url = "https://quranapi.pages.dev/api/"
12
+ self.logger = logging.getLogger(__name__)
13
+ self.full_quran = None
14
+ self.surahs = None
15
+ self.all_verses = [] # List of {'surah_id': int, 'verse_num': int, 'text': str}
16
+ self.verse_embeddings = None
17
+ self.model = None
18
+ self._load_full_quran()
19
+ print(f"Surahs loaded: {len(self.surahs) if self.surahs else 0}") # Debug
20
+ self._load_all_verses_and_embeddings()
21
+ print(f"Verses loaded: {len(self.all_verses)}") # Debug
22
 
23
+ def _load_full_quran(self):
24
+ max_retries = 3
25
+ for attempt in range(max_retries):
26
+ try:
27
+ response = requests.get(f"{self.api_url}surah.json", timeout=10)
28
+ response.raise_for_status()
29
+ self.surahs = response.json() # Array of surah metadata (no 'id')
30
+ # Add 'id' to surahs for consistency
31
+ for i, s in enumerate(self.surahs):
32
+ s['id'] = i + 1
33
+ # Fetch full verses
34
+ self.full_quran = []
35
+ for surah_id in range(1, 115):
36
+ surah_response = requests.get(f"{self.api_url}{surah_id}.json", timeout=10)
37
+ surah_response.raise_for_status()
38
+ surah_data = surah_response.json()
39
+ surah_data['id'] = surah_id # Add id for consistency
40
+ self.full_quran.append(surah_data)
41
+ break
42
+ except Exception as e:
43
+ self.logger.error(f"Attempt {attempt + 1}/{max_retries} failed to fetch full Quran from API: {e}")
44
+ if attempt == max_retries - 1:
45
+ self._load_fallback_quran()
46
+ time.sleep(2 ** attempt)
47
+
48
+ def _load_fallback_quran(self):
49
+ max_retries = 3
50
+ for source in QURAN_DATA_SOURCES:
51
+ for attempt in range(max_retries):
52
+ try:
53
+ response = requests.get(source, timeout=10)
54
+ response.raise_for_status()
55
+ self.full_quran = response.json() # Array of surah dicts
56
+ self.surahs = self.full_quran
57
+ break
58
+ except Exception as e:
59
+ self.logger.error(f"Attempt {attempt + 1}/{max_retries} failed to fetch fallback from {source}: {e}")
60
+ if attempt == max_retries - 1 and source == QURAN_DATA_SOURCES[-1]:
61
+ self.surahs = self._load_fallback_surahs()
62
+ time.sleep(2 ** attempt)
63
+
64
+ def _load_all_verses_and_embeddings(self):
65
+ if not self.full_quran:
66
+ self.logger.error("No full Quran loaded, skipping verse loading")
67
+ self.all_verses = [
68
+ {'surah_id': 1, 'verse_num': 1, 'text': "بِسْمِ ٱللَّهِ ٱلرَّحْمَـٰنِ ٱلرَّحِيمِ"},
69
+ ]
70
+ return
71
+
72
+ for surah in self.full_quran:
73
+ surah_id = surah.get('id', 1)
74
+ if 'arabic1' in surah: # API structure: verses as list of str
75
+ verses = surah['arabic1']
76
+ for verse_num, text in enumerate(verses, start=1):
77
+ self.all_verses.append({
78
+ 'surah_id': surah_id,
79
+ 'verse_num': verse_num,
80
+ 'text': text
81
+ })
82
+ else: # Fallback structure: verses as list of dict
83
+ verses = surah.get('verses', [])
84
+ for verse in verses:
85
+ verse_num = verse.get('id', 1)
86
+ text = verse.get('text', '')
87
+ self.all_verses.append({
88
+ 'surah_id': surah_id,
89
+ 'verse_num': verse_num,
90
+ 'text': text
91
+ })
92
 
93
+ try:
94
+ self.model = SentenceTransformer(MODEL_NAME)
95
+ verse_texts = [v['text'] for v in self.all_verses]
96
+ self.verse_embeddings = []
97
+ for i in range(0, len(verse_texts), CHUNK_SIZE):
98
+ chunk = verse_texts[i:i + CHUNK_SIZE]
99
+ embeddings = self.model.encode(chunk, prompt_name='retrieval.passage')
100
+ self.verse_embeddings.append(embeddings)
101
+ self.verse_embeddings = np.vstack(self.verse_embeddings)
102
+ except Exception as e:
103
+ self.logger.error(f"Failed to compute embeddings: {e}")
104
+ self.verse_embeddings = None
105
 
106
+ def get_surahs(self):
107
+ if self.surahs:
108
+ return [
109
+ (s.get('surahNameArabicLong', s.get('name', '')), s['id'])
110
+ for s in self.surahs
111
+ ]
112
+ return self._load_fallback_surahs()
113
+
114
+ def get_surah_text(self, surah_id):
115
+ if self.full_quran:
116
+ try:
117
+ surah = next((s for s in self.full_quran if s['id'] == surah_id), None)
118
+ if surah:
119
+ if 'arabic1' in surah: # API
120
+ verses = surah['arabic1']
121
+ return "\n\n".join(f"آية {i + 1}: {v}" for i, v in enumerate(verses))
122
+ else: # Fallback
123
+ verses = surah['verses']
124
+ return "\n\n".join(f"آية {v['id']}: {v['text']}" for v in verses)
125
+ except Exception as e:
126
+ self.logger.error(f"Error processing cached surah {surah_id}: {e}")
127
+
128
+ max_retries = 3
129
+ for attempt in range(max_retries):
130
+ try:
131
+ response = requests.get(f"{self.api_url}{surah_id}.json", timeout=10)
132
+ response.raise_for_status()
133
+ data = response.json()
134
+ verses = data['arabic1']
135
+ return "\n\n".join(f"آية {i + 1}: {v}" for i, v in enumerate(verses))
136
+ except Exception as e:
137
+ self.logger.error(f"Attempt {attempt + 1}/{max_retries} failed to fetch surah {surah_id}: {e}")
138
+ if attempt == max_retries - 1:
139
+ return self._load_fallback_verse()
140
+ time.sleep(2 ** attempt)
141
+
142
+ def search_verses(self, query, top_k=5):
143
+ if self.verse_embeddings is None or not self.all_verses:
144
+ return self._keyword_fallback_search(query, top_k)
145
 
146
+ try:
147
+ query_embedding = self.model.encode([query], prompt_name='retrieval.query')
148
+ similarities = cosine_similarity(query_embedding, self.verse_embeddings)[0]
149
+ top_indices = np.argsort(similarities)[-top_k:][::-1]
150
+
151
+ results = []
152
+ for idx in top_indices:
153
+ verse = self.all_verses[idx]
154
+ surah_name = next((s.get('surahNameArabicLong', s.get('name', '')) for s in self.surahs if s['id'] == verse['surah_id']), f"سورة {verse['surah_id']}")
155
+ # Avoid double "سورة"
156
+ if surah_name.startswith("سورة "):
157
+ surah_name = surah_name[len("سورة "):]
158
+ results.append(
159
+ f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}"
160
+ )
161
+ return "\n\n".join(results)
162
+ except Exception as e:
163
+ self.logger.error(f"Search failed: {e}")
164
+ return "حدث خطأ أثناء البحث. جرب مرة أخرى."
165
+
166
+ def _keyword_fallback_search(self, query, top_k=5):
167
+ query_lower = query.lower()
168
+ matches = []
169
+ for verse in self.all_verses:
170
+ if query_lower in verse['text'].lower():
171
+ surah_name = next((s.get('surahNameArabicLong', s.get('name', '')) for s in self.surahs if s['id'] == verse['surah_id']), f"سورة {verse['surah_id']}")
172
+ if surah_name.startswith("سورة "):
173
+ surah_name = surah_name[len("سورة "):]
174
+ matches.append(f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}")
175
+ return "\n\n".join(matches[:top_k]) or "لا توجد نتائج مطابقة."
176
+
177
+ def _load_fallback_surahs(self):
178
+ return [
179
+ ("الفاتحة", 1),
180
+ ("البقرة", 2),
181
+ ("آل عمران", 3)
182
+ ]
183
 
184
+ def _load_fallback_verse(self):
185
+ return "بسم الله الرحمن الرحيم\nالله لا إله إلا هو الحي القيوم"