Batnini commited on
Commit
e962d6e
·
verified ·
1 Parent(s): c6f9a1b

Update tools/quran_search.py

Browse files
Files changed (1) hide show
  1. tools/quran_search.py +3 -108
tools/quran_search.py CHANGED
@@ -15,10 +15,13 @@ class QuranSearchEngine:
15
  self.all_verses = [] # List of {'surah_id': int, 'verse_num': int, 'text': str}
16
  self.verse_embeddings = None
17
  self.model = None
 
18
  self._load_full_quran()
19
  print(f"Surahs loaded: {len(self.surahs) if self.surahs else 0}") # Debug
20
  self._load_all_verses_and_embeddings()
21
  print(f"Verses loaded: {len(self.all_verses)}") # Debug
 
 
22
 
23
  def _load_full_quran(self):
24
  max_retries = 3
@@ -77,111 +80,3 @@ class QuranSearchEngine:
77
  self.all_verses.append({
78
  'surah_id': surah_id,
79
  'verse_num': verse_num,
80
- 'text': text
81
- })
82
- else: # Fallback structure: verses as list of dict
83
- verses = surah.get('verses', [])
84
- for verse in verses:
85
- verse_num = verse.get('id', 1)
86
- text = verse.get('text', '')
87
- self.all_verses.append({
88
- 'surah_id': surah_id,
89
- 'verse_num': verse_num,
90
- 'text': text
91
- })
92
-
93
- try:
94
- self.model = SentenceTransformer(MODEL_NAME)
95
- verse_texts = ["passage: " + v['text'] for v in self.all_verses] # Add prefix for e5 model
96
- self.verse_embeddings = []
97
- for i in range(0, len(verse_texts), CHUNK_SIZE):
98
- chunk = verse_texts[i:i + CHUNK_SIZE]
99
- embeddings = self.model.encode(chunk, convert_to_tensor=False)
100
- self.verse_embeddings.append(embeddings)
101
- self.verse_embeddings = np.vstack(self.verse_embeddings)
102
- except Exception as e:
103
- self.logger.error(f"Failed to compute embeddings: {e}")
104
- self.verse_embeddings = None
105
-
106
- def get_surahs(self):
107
- if self.surahs:
108
- return [
109
- (s.get('surahNameArabicLong', s.get('name', '')), s['id'])
110
- for s in self.surahs
111
- ]
112
- return self._load_fallback_surahs()
113
-
114
- def get_surah_text(self, surah_id):
115
- if self.full_quran:
116
- try:
117
- surah = next((s for s in self.full_quran if s['id'] == surah_id), None)
118
- if surah:
119
- if 'arabic1' in surah: # API
120
- verses = surah['arabic1']
121
- return "\n\n".join(f"آية {i + 1}: {v}" for i, v in enumerate(verses))
122
- else: # Fallback
123
- verses = surah['verses']
124
- return "\n\n".join(f"آية {v['id']}: {v['text']}" for v in verses)
125
- except Exception as e:
126
- self.logger.error(f"Error processing cached surah {surah_id}: {e}")
127
-
128
- max_retries = 3
129
- for attempt in range(max_retries):
130
- try:
131
- response = requests.get(f"{self.api_url}{surah_id}.json", timeout=10)
132
- response.raise_for_status()
133
- data = response.json()
134
- verses = data['arabic1']
135
- return "\n\n".join(f"آية {i + 1}: {v}" for i, v in enumerate(verses))
136
- except Exception as e:
137
- self.logger.error(f"Attempt {attempt + 1}/{max_retries} failed to fetch surah {surah_id}: {e}")
138
- if attempt == max_retries - 1:
139
- return self._load_fallback_verse()
140
- time.sleep(2 ** attempt)
141
-
142
- def search_verses(self, query, top_k=5):
143
- if self.verse_embeddings is None or not self.all_verses:
144
- return self._keyword_fallback_search(query, top_k)
145
-
146
- try:
147
- # Add context for single words or short queries
148
- if len(query.split()) <= 1:
149
- query = f"معنى كلمة {query}" # Add context: "meaning of the word"
150
- query_embedding = self.model.encode(["query: " + query], convert_to_tensor=False)
151
- similarities = cosine_similarity(query_embedding, self.verse_embeddings)[0]
152
- top_indices = np.argsort(similarities)[-top_k:][::-1]
153
-
154
- results = []
155
- for idx in top_indices:
156
- verse = self.all_verses[idx]
157
- surah_name = next((s.get('surahNameArabicLong', s.get('name', '')) for s in self.surahs if s['id'] == verse['surah_id']), f"سورة {verse['surah_id']}")
158
- if surah_name.startswith("سورة "):
159
- surah_name = surah_name[len("سورة "):]
160
- results.append(
161
- f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}"
162
- )
163
- return "\n\n".join(results) or "لا توجد نتائج ذات صلة."
164
- except Exception as e:
165
- self.logger.error(f"Search failed: {e}")
166
- return "حدث خطأ أثناء البحث. جرب مرة أخرى."
167
-
168
- def _keyword_fallback_search(self, query, top_k=5):
169
- query_lower = query.lower()
170
- matches = []
171
- for verse in self.all_verses:
172
- if query_lower in verse['text'].lower():
173
- surah_name = next((s.get('surahNameArabicLong', s.get('name', '')) for s in self.surahs if s['id'] == verse['surah_id']), f"سورة {verse['surah_id']}")
174
- if surah_name.startswith("سورة "):
175
- surah_name = surah_name[len("سورة "):]
176
- matches.append(f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}")
177
- return "\n\n".join(matches[:top_k]) or "لا توجد نتائج مطابقة."
178
-
179
- def _load_fallback_surahs(self):
180
- return [
181
- ("الفاتحة", 1),
182
- ("البقرة", 2),
183
- ("آل عمران", 3)
184
- ]
185
-
186
- def _load_fallback_verse(self):
187
- return "بسم الله الرحمن الرحيم\nالله لا إله إلا هو الحي القيوم"
 
15
  self.all_verses = [] # List of {'surah_id': int, 'verse_num': int, 'text': str}
16
  self.verse_embeddings = None
17
  self.model = None
18
+ print("Starting QuranSearchEngine initialization...") # Debug
19
  self._load_full_quran()
20
  print(f"Surahs loaded: {len(self.surahs) if self.surahs else 0}") # Debug
21
  self._load_all_verses_and_embeddings()
22
  print(f"Verses loaded: {len(self.all_verses)}") # Debug
23
+ if not self.model:
24
+ self.logger.error("Model initialization failed, using fallback behavior")
25
 
26
  def _load_full_quran(self):
27
  max_retries = 3
 
80
  self.all_verses.append({
81
  'surah_id': surah_id,
82
  'verse_num': verse_num,