Update tools/quran_search.py
Browse files- tools/quran_search.py +3 -108
tools/quran_search.py
CHANGED
@@ -15,10 +15,13 @@ class QuranSearchEngine:
|
|
15 |
self.all_verses = [] # List of {'surah_id': int, 'verse_num': int, 'text': str}
|
16 |
self.verse_embeddings = None
|
17 |
self.model = None
|
|
|
18 |
self._load_full_quran()
|
19 |
print(f"Surahs loaded: {len(self.surahs) if self.surahs else 0}") # Debug
|
20 |
self._load_all_verses_and_embeddings()
|
21 |
print(f"Verses loaded: {len(self.all_verses)}") # Debug
|
|
|
|
|
22 |
|
23 |
def _load_full_quran(self):
|
24 |
max_retries = 3
|
@@ -77,111 +80,3 @@ class QuranSearchEngine:
|
|
77 |
self.all_verses.append({
|
78 |
'surah_id': surah_id,
|
79 |
'verse_num': verse_num,
|
80 |
-
'text': text
|
81 |
-
})
|
82 |
-
else: # Fallback structure: verses as list of dict
|
83 |
-
verses = surah.get('verses', [])
|
84 |
-
for verse in verses:
|
85 |
-
verse_num = verse.get('id', 1)
|
86 |
-
text = verse.get('text', '')
|
87 |
-
self.all_verses.append({
|
88 |
-
'surah_id': surah_id,
|
89 |
-
'verse_num': verse_num,
|
90 |
-
'text': text
|
91 |
-
})
|
92 |
-
|
93 |
-
try:
|
94 |
-
self.model = SentenceTransformer(MODEL_NAME)
|
95 |
-
verse_texts = ["passage: " + v['text'] for v in self.all_verses] # Add prefix for e5 model
|
96 |
-
self.verse_embeddings = []
|
97 |
-
for i in range(0, len(verse_texts), CHUNK_SIZE):
|
98 |
-
chunk = verse_texts[i:i + CHUNK_SIZE]
|
99 |
-
embeddings = self.model.encode(chunk, convert_to_tensor=False)
|
100 |
-
self.verse_embeddings.append(embeddings)
|
101 |
-
self.verse_embeddings = np.vstack(self.verse_embeddings)
|
102 |
-
except Exception as e:
|
103 |
-
self.logger.error(f"Failed to compute embeddings: {e}")
|
104 |
-
self.verse_embeddings = None
|
105 |
-
|
106 |
-
def get_surahs(self):
|
107 |
-
if self.surahs:
|
108 |
-
return [
|
109 |
-
(s.get('surahNameArabicLong', s.get('name', '')), s['id'])
|
110 |
-
for s in self.surahs
|
111 |
-
]
|
112 |
-
return self._load_fallback_surahs()
|
113 |
-
|
114 |
-
def get_surah_text(self, surah_id):
|
115 |
-
if self.full_quran:
|
116 |
-
try:
|
117 |
-
surah = next((s for s in self.full_quran if s['id'] == surah_id), None)
|
118 |
-
if surah:
|
119 |
-
if 'arabic1' in surah: # API
|
120 |
-
verses = surah['arabic1']
|
121 |
-
return "\n\n".join(f"آية {i + 1}: {v}" for i, v in enumerate(verses))
|
122 |
-
else: # Fallback
|
123 |
-
verses = surah['verses']
|
124 |
-
return "\n\n".join(f"آية {v['id']}: {v['text']}" for v in verses)
|
125 |
-
except Exception as e:
|
126 |
-
self.logger.error(f"Error processing cached surah {surah_id}: {e}")
|
127 |
-
|
128 |
-
max_retries = 3
|
129 |
-
for attempt in range(max_retries):
|
130 |
-
try:
|
131 |
-
response = requests.get(f"{self.api_url}{surah_id}.json", timeout=10)
|
132 |
-
response.raise_for_status()
|
133 |
-
data = response.json()
|
134 |
-
verses = data['arabic1']
|
135 |
-
return "\n\n".join(f"آية {i + 1}: {v}" for i, v in enumerate(verses))
|
136 |
-
except Exception as e:
|
137 |
-
self.logger.error(f"Attempt {attempt + 1}/{max_retries} failed to fetch surah {surah_id}: {e}")
|
138 |
-
if attempt == max_retries - 1:
|
139 |
-
return self._load_fallback_verse()
|
140 |
-
time.sleep(2 ** attempt)
|
141 |
-
|
142 |
-
def search_verses(self, query, top_k=5):
|
143 |
-
if self.verse_embeddings is None or not self.all_verses:
|
144 |
-
return self._keyword_fallback_search(query, top_k)
|
145 |
-
|
146 |
-
try:
|
147 |
-
# Add context for single words or short queries
|
148 |
-
if len(query.split()) <= 1:
|
149 |
-
query = f"معنى كلمة {query}" # Add context: "meaning of the word"
|
150 |
-
query_embedding = self.model.encode(["query: " + query], convert_to_tensor=False)
|
151 |
-
similarities = cosine_similarity(query_embedding, self.verse_embeddings)[0]
|
152 |
-
top_indices = np.argsort(similarities)[-top_k:][::-1]
|
153 |
-
|
154 |
-
results = []
|
155 |
-
for idx in top_indices:
|
156 |
-
verse = self.all_verses[idx]
|
157 |
-
surah_name = next((s.get('surahNameArabicLong', s.get('name', '')) for s in self.surahs if s['id'] == verse['surah_id']), f"سورة {verse['surah_id']}")
|
158 |
-
if surah_name.startswith("سورة "):
|
159 |
-
surah_name = surah_name[len("سورة "):]
|
160 |
-
results.append(
|
161 |
-
f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}"
|
162 |
-
)
|
163 |
-
return "\n\n".join(results) or "لا توجد نتائج ذات صلة."
|
164 |
-
except Exception as e:
|
165 |
-
self.logger.error(f"Search failed: {e}")
|
166 |
-
return "حدث خطأ أثناء البحث. جرب مرة أخرى."
|
167 |
-
|
168 |
-
def _keyword_fallback_search(self, query, top_k=5):
|
169 |
-
query_lower = query.lower()
|
170 |
-
matches = []
|
171 |
-
for verse in self.all_verses:
|
172 |
-
if query_lower in verse['text'].lower():
|
173 |
-
surah_name = next((s.get('surahNameArabicLong', s.get('name', '')) for s in self.surahs if s['id'] == verse['surah_id']), f"سورة {verse['surah_id']}")
|
174 |
-
if surah_name.startswith("سورة "):
|
175 |
-
surah_name = surah_name[len("سورة "):]
|
176 |
-
matches.append(f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}")
|
177 |
-
return "\n\n".join(matches[:top_k]) or "لا توجد نتائج مطابقة."
|
178 |
-
|
179 |
-
def _load_fallback_surahs(self):
|
180 |
-
return [
|
181 |
-
("الفاتحة", 1),
|
182 |
-
("البقرة", 2),
|
183 |
-
("آل عمران", 3)
|
184 |
-
]
|
185 |
-
|
186 |
-
def _load_fallback_verse(self):
|
187 |
-
return "بسم الله الرحمن الرحيم\nالله لا إله إلا هو الحي القيوم"
|
|
|
15 |
self.all_verses = [] # List of {'surah_id': int, 'verse_num': int, 'text': str}
|
16 |
self.verse_embeddings = None
|
17 |
self.model = None
|
18 |
+
print("Starting QuranSearchEngine initialization...") # Debug
|
19 |
self._load_full_quran()
|
20 |
print(f"Surahs loaded: {len(self.surahs) if self.surahs else 0}") # Debug
|
21 |
self._load_all_verses_and_embeddings()
|
22 |
print(f"Verses loaded: {len(self.all_verses)}") # Debug
|
23 |
+
if not self.model:
|
24 |
+
self.logger.error("Model initialization failed, using fallback behavior")
|
25 |
|
26 |
def _load_full_quran(self):
|
27 |
max_retries = 3
|
|
|
80 |
self.all_verses.append({
|
81 |
'surah_id': surah_id,
|
82 |
'verse_num': verse_num,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|