Batnini commited on
Commit
18a4bae
·
verified ·
1 Parent(s): 3a5ebef

Update tools/quran_search.py

Browse files
Files changed (1) hide show
  1. tools/quran_search.py +42 -42
tools/quran_search.py CHANGED
@@ -1,6 +1,5 @@
1
  import requests
2
  import logging
3
- import pandas as pd
4
  import numpy as np
5
  from sentence_transformers import SentenceTransformer
6
  from sklearn.metrics.pairwise import cosine_similarity
@@ -27,13 +26,18 @@ class QuranSearchEngine:
27
  try:
28
  response = requests.get(f"{self.api_url}surah.json", timeout=10)
29
  response.raise_for_status()
30
- self.surahs = response.json() # Array of surah metadata
31
- # Fetch full verses by iterating over surah IDs
 
 
 
32
  self.full_quran = []
33
  for surah_id in range(1, 115):
34
  surah_response = requests.get(f"{self.api_url}{surah_id}.json", timeout=10)
35
  surah_response.raise_for_status()
36
- self.full_quran.append(surah_response.json())
 
 
37
  break
38
  except Exception as e:
39
  self.logger.error(f"Attempt {attempt + 1}/{max_retries} failed to fetch full Quran from API: {e}")
@@ -66,16 +70,25 @@ class QuranSearchEngine:
66
  return
67
 
68
  for surah in self.full_quran:
69
- surah_id = surah.get('id', surah.get('surah_id', 1)) # Handle API vs fallback structure
70
- verses = surah.get('verses', surah.get('arabic1', [])) # API uses 'arabic1', fallback uses 'verses'
71
- for verse in verses:
72
- verse_num = verse.get('id', verse.get('verse_id', 1))
73
- text = verse.get('text', verse) # Handle different key structures
74
- self.all_verses.append({
75
- 'surah_id': surah_id,
76
- 'verse_num': verse_num,
77
- 'text': text
78
- })
 
 
 
 
 
 
 
 
 
79
 
80
  try:
81
  self.model = SentenceTransformer(MODEL_NAME)
@@ -93,7 +106,7 @@ class QuranSearchEngine:
93
  def get_surahs(self):
94
  if self.surahs:
95
  return [
96
- (s['name'], s['id'])
97
  for s in self.surahs
98
  ]
99
  return self._load_fallback_surahs()
@@ -101,10 +114,14 @@ class QuranSearchEngine:
101
  def get_surah_text(self, surah_id):
102
  if self.full_quran:
103
  try:
104
- surah = next((s for s in self.full_quran if s.get('id', s.get('surah_id', 1)) == surah_id), None)
105
  if surah:
106
- verses = surah.get('verses', surah.get('arabic1', []))
107
- return "\n\n".join(f"آية {v.get('id', i + 1)}: {v.get('text', v)}" for i, v in enumerate(verses))
 
 
 
 
108
  except Exception as e:
109
  self.logger.error(f"Error processing cached surah {surah_id}: {e}")
110
 
@@ -134,7 +151,7 @@ class QuranSearchEngine:
134
  results = []
135
  for idx in top_indices:
136
  verse = self.all_verses[idx]
137
- surah_name = next((s['name'] for s in self.surahs if s.get('id', s.get('surah_id', 1)) == verse['surah_id']), f"سورة {verse['surah_id']}")
138
  results.append(
139
  f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}"
140
  )
@@ -148,33 +165,16 @@ class QuranSearchEngine:
148
  matches = []
149
  for verse in self.all_verses:
150
  if query_lower in verse['text'].lower():
151
- surah_name = next((s['name'] for s in self.surahs if s.get('id', s.get('surah_id', 1)) == verse['surah_id']), f"سورة {verse['surah_id']}")
152
  matches.append(f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}")
153
  return "\n\n".join(matches[:top_k]) or "لا توجد نتائج مطابقة."
154
 
155
  def _load_fallback_surahs(self):
156
- try:
157
- for source in QURAN_DATA_SOURCES:
158
- try:
159
- response = requests.get(source, timeout=10)
160
- response.raise_for_status()
161
- data = response.json()
162
- return [(s['name'], s['id']) for s in data]
163
- except Exception as e:
164
- self.logger.error(f"Failed to load fallback surahs from {source}: {e}")
165
- continue
166
- return [
167
- ("الفاتحة", 1),
168
- ("البقرة", 2),
169
- ("آل عمران", 3)
170
- ]
171
- except Exception as e:
172
- self.logger.error(f"Failed to load fallback surahs: {e}")
173
- return [
174
- ("الفاتحة", 1),
175
- ("البقرة", 2),
176
- ("آل عمران", 3)
177
- ]
178
 
179
  def _load_fallback_verse(self):
180
  return "بسم الله الرحمن الرحيم\nالله لا إله إلا هو الحي القيوم"
 
1
  import requests
2
  import logging
 
3
  import numpy as np
4
  from sentence_transformers import SentenceTransformer
5
  from sklearn.metrics.pairwise import cosine_similarity
 
26
  try:
27
  response = requests.get(f"{self.api_url}surah.json", timeout=10)
28
  response.raise_for_status()
29
+ self.surahs = response.json() # Array of surah metadata (no 'id')
30
+ # Add 'id' to surahs for consistency
31
+ for i, s in enumerate(self.surahs):
32
+ s['id'] = i + 1
33
+ # Fetch full verses
34
  self.full_quran = []
35
  for surah_id in range(1, 115):
36
  surah_response = requests.get(f"{self.api_url}{surah_id}.json", timeout=10)
37
  surah_response.raise_for_status()
38
+ surah_data = surah_response.json()
39
+ surah_data['id'] = surah_id # Add id for consistency
40
+ self.full_quran.append(surah_data)
41
  break
42
  except Exception as e:
43
  self.logger.error(f"Attempt {attempt + 1}/{max_retries} failed to fetch full Quran from API: {e}")
 
70
  return
71
 
72
  for surah in self.full_quran:
73
+ surah_id = surah.get('id', 1)
74
+ if 'arabic1' in surah: # API structure: verses as list of str
75
+ verses = surah['arabic1']
76
+ for verse_num, text in enumerate(verses, start=1):
77
+ self.all_verses.append({
78
+ 'surah_id': surah_id,
79
+ 'verse_num': verse_num,
80
+ 'text': text
81
+ })
82
+ else: # Fallback structure: verses as list of dict
83
+ verses = surah.get('verses', [])
84
+ for verse in verses:
85
+ verse_num = verse.get('id', 1)
86
+ text = verse.get('text', '')
87
+ self.all_verses.append({
88
+ 'surah_id': surah_id,
89
+ 'verse_num': verse_num,
90
+ 'text': text
91
+ })
92
 
93
  try:
94
  self.model = SentenceTransformer(MODEL_NAME)
 
106
  def get_surahs(self):
107
  if self.surahs:
108
  return [
109
+ (s.get('surahNameArabicLong', s.get('name', '')), s['id'])
110
  for s in self.surahs
111
  ]
112
  return self._load_fallback_surahs()
 
114
  def get_surah_text(self, surah_id):
115
  if self.full_quran:
116
  try:
117
+ surah = next((s for s in self.full_quran if s['id'] == surah_id), None)
118
  if surah:
119
+ if 'arabic1' in surah: # API
120
+ verses = surah['arabic1']
121
+ return "\n\n".join(f"آية {i + 1}: {v}" for i, v in enumerate(verses))
122
+ else: # Fallback
123
+ verses = surah['verses']
124
+ return "\n\n".join(f"آية {v['id']}: {v['text']}" for v in verses)
125
  except Exception as e:
126
  self.logger.error(f"Error processing cached surah {surah_id}: {e}")
127
 
 
151
  results = []
152
  for idx in top_indices:
153
  verse = self.all_verses[idx]
154
+ surah_name = next((s.get('surahNameArabicLong', s.get('name', '')) for s in self.surahs if s['id'] == verse['surah_id']), f"سورة {verse['surah_id']}")
155
  results.append(
156
  f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}"
157
  )
 
165
  matches = []
166
  for verse in self.all_verses:
167
  if query_lower in verse['text'].lower():
168
+ surah_name = next((s.get('surahNameArabicLong', s.get('name', '')) for s in self.surahs if s['id'] == verse['surah_id']), f"سورة {verse['surah_id']}")
169
  matches.append(f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}")
170
  return "\n\n".join(matches[:top_k]) or "لا توجد نتائج مطابقة."
171
 
172
  def _load_fallback_surahs(self):
173
+ return [
174
+ ("الفاتحة", 1),
175
+ ("البقرة", 2),
176
+ ("آل عمران", 3)
177
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
  def _load_fallback_verse(self):
180
  return "بسم الله الرحمن الرحيم\nالله لا إله إلا هو الحي القيوم"