Batnini commited on
Commit
3a5ebef
·
verified ·
1 Parent(s): 158528d

Update tools/quran_search.py

Browse files
Files changed (1) hide show
  1. tools/quran_search.py +49 -26
tools/quran_search.py CHANGED
@@ -9,7 +9,7 @@ import time
9
 
10
  class QuranSearchEngine:
11
  def __init__(self):
12
- self.full_quran_url = "https://cdn.jsdelivr.net/npm/[email protected]/dist/quran.json"
13
  self.logger = logging.getLogger(__name__)
14
  self.full_quran = None
15
  self.surahs = None
@@ -25,17 +25,38 @@ class QuranSearchEngine:
25
  max_retries = 3
26
  for attempt in range(max_retries):
27
  try:
28
- response = requests.get(self.full_quran_url, timeout=10)
29
  response.raise_for_status()
30
- self.full_quran = response.json() # Array of surah dicts
31
- self.surahs = self.full_quran
 
 
 
 
 
32
  break
33
  except Exception as e:
34
- self.logger.error(f"Attempt {attempt + 1}/{max_retries} failed to fetch full Quran: {e}")
35
  if attempt == max_retries - 1:
36
- self.surahs = self._load_fallback_surahs()
37
  time.sleep(2 ** attempt)
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def _load_all_verses_and_embeddings(self):
40
  if not self.full_quran:
41
  self.logger.error("No full Quran loaded, skipping verse loading")
@@ -45,12 +66,15 @@ class QuranSearchEngine:
45
  return
46
 
47
  for surah in self.full_quran:
48
- surah_id = surah['id']
49
- for verse in surah['verses']:
 
 
 
50
  self.all_verses.append({
51
  'surah_id': surah_id,
52
- 'verse_num': verse['id'],
53
- 'text': verse['text']
54
  })
55
 
56
  try:
@@ -77,17 +101,17 @@ class QuranSearchEngine:
77
  def get_surah_text(self, surah_id):
78
  if self.full_quran:
79
  try:
80
- surah = self.full_quran[surah_id - 1]
81
- verses = surah['verses']
82
- return "\n\n".join(f"آية {v['id']}: {v['text']}" for v in verses)
83
- except IndexError:
84
- self.logger.error(f"Surah {surah_id} not found in cached data")
 
85
 
86
- # Fallback if cache failed
87
  max_retries = 3
88
  for attempt in range(max_retries):
89
  try:
90
- response = requests.get(f"https://quranapi.pages.dev/api/{surah_id}.json", timeout=10)
91
  response.raise_for_status()
92
  data = response.json()
93
  verses = data['arabic1']
@@ -110,7 +134,7 @@ class QuranSearchEngine:
110
  results = []
111
  for idx in top_indices:
112
  verse = self.all_verses[idx]
113
- surah_name = self.surahs[verse['surah_id'] - 1]['name']
114
  results.append(
115
  f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}"
116
  )
@@ -124,21 +148,20 @@ class QuranSearchEngine:
124
  matches = []
125
  for verse in self.all_verses:
126
  if query_lower in verse['text'].lower():
127
- surah_name = self.surahs[verse['surah_id'] - 1]['name'] if self.surahs else f"سورة {verse['surah_id']}"
128
  matches.append(f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}")
129
  return "\n\n".join(matches[:top_k]) or "لا توجد نتائج مطابقة."
130
 
131
- # Fallback methods (unchanged)
132
  def _load_fallback_surahs(self):
133
  try:
134
  for source in QURAN_DATA_SOURCES:
135
  try:
136
- df = pd.read_csv(source)
137
- return [
138
- (row['name_arabic'], row['surah_id'])
139
- for _, row in df.drop_duplicates(subset=['surah_id']).iterrows()
140
- ]
141
- except:
142
  continue
143
  return [
144
  ("الفاتحة", 1),
 
9
 
10
  class QuranSearchEngine:
11
  def __init__(self):
12
+ self.api_url = "https://quranapi.pages.dev/api/"
13
  self.logger = logging.getLogger(__name__)
14
  self.full_quran = None
15
  self.surahs = None
 
25
  max_retries = 3
26
  for attempt in range(max_retries):
27
  try:
28
+ response = requests.get(f"{self.api_url}surah.json", timeout=10)
29
  response.raise_for_status()
30
+ self.surahs = response.json() # Array of surah metadata
31
+ # Fetch full verses by iterating over surah IDs
32
+ self.full_quran = []
33
+ for surah_id in range(1, 115):
34
+ surah_response = requests.get(f"{self.api_url}{surah_id}.json", timeout=10)
35
+ surah_response.raise_for_status()
36
+ self.full_quran.append(surah_response.json())
37
  break
38
  except Exception as e:
39
+ self.logger.error(f"Attempt {attempt + 1}/{max_retries} failed to fetch full Quran from API: {e}")
40
  if attempt == max_retries - 1:
41
+ self._load_fallback_quran()
42
  time.sleep(2 ** attempt)
43
 
44
+ def _load_fallback_quran(self):
45
+ max_retries = 3
46
+ for source in QURAN_DATA_SOURCES:
47
+ for attempt in range(max_retries):
48
+ try:
49
+ response = requests.get(source, timeout=10)
50
+ response.raise_for_status()
51
+ self.full_quran = response.json() # Array of surah dicts
52
+ self.surahs = self.full_quran
53
+ break
54
+ except Exception as e:
55
+ self.logger.error(f"Attempt {attempt + 1}/{max_retries} failed to fetch fallback from {source}: {e}")
56
+ if attempt == max_retries - 1 and source == QURAN_DATA_SOURCES[-1]:
57
+ self.surahs = self._load_fallback_surahs()
58
+ time.sleep(2 ** attempt)
59
+
60
  def _load_all_verses_and_embeddings(self):
61
  if not self.full_quran:
62
  self.logger.error("No full Quran loaded, skipping verse loading")
 
66
  return
67
 
68
  for surah in self.full_quran:
69
+ surah_id = surah.get('id', surah.get('surah_id', 1)) # Handle API vs fallback structure
70
+ verses = surah.get('verses', surah.get('arabic1', [])) # API uses 'arabic1', fallback uses 'verses'
71
+ for verse in verses:
72
+ verse_num = verse.get('id', verse.get('verse_id', 1))
73
+ text = verse.get('text', verse) # Handle different key structures
74
  self.all_verses.append({
75
  'surah_id': surah_id,
76
+ 'verse_num': verse_num,
77
+ 'text': text
78
  })
79
 
80
  try:
 
101
  def get_surah_text(self, surah_id):
102
  if self.full_quran:
103
  try:
104
+ surah = next((s for s in self.full_quran if s.get('id', s.get('surah_id', 1)) == surah_id), None)
105
+ if surah:
106
+ verses = surah.get('verses', surah.get('arabic1', []))
107
+ return "\n\n".join(f"آية {v.get('id', i + 1)}: {v.get('text', v)}" for i, v in enumerate(verses))
108
+ except Exception as e:
109
+ self.logger.error(f"Error processing cached surah {surah_id}: {e}")
110
 
 
111
  max_retries = 3
112
  for attempt in range(max_retries):
113
  try:
114
+ response = requests.get(f"{self.api_url}{surah_id}.json", timeout=10)
115
  response.raise_for_status()
116
  data = response.json()
117
  verses = data['arabic1']
 
134
  results = []
135
  for idx in top_indices:
136
  verse = self.all_verses[idx]
137
+ surah_name = next((s['name'] for s in self.surahs if s.get('id', s.get('surah_id', 1)) == verse['surah_id']), f"سورة {verse['surah_id']}")
138
  results.append(
139
  f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}"
140
  )
 
148
  matches = []
149
  for verse in self.all_verses:
150
  if query_lower in verse['text'].lower():
151
+ surah_name = next((s['name'] for s in self.surahs if s.get('id', s.get('surah_id', 1)) == verse['surah_id']), f"سورة {verse['surah_id']}")
152
  matches.append(f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}")
153
  return "\n\n".join(matches[:top_k]) or "لا توجد نتائج مطابقة."
154
 
 
155
  def _load_fallback_surahs(self):
156
  try:
157
  for source in QURAN_DATA_SOURCES:
158
  try:
159
+ response = requests.get(source, timeout=10)
160
+ response.raise_for_status()
161
+ data = response.json()
162
+ return [(s['name'], s['id']) for s in data]
163
+ except Exception as e:
164
+ self.logger.error(f"Failed to load fallback surahs from {source}: {e}")
165
  continue
166
  return [
167
  ("الفاتحة", 1),