Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -1,387 +1,17 @@
|
|
|
|
1 |
|
2 |
-
|
3 |
-
from flet import *
|
4 |
-
import requests
|
5 |
-
import json
|
6 |
-
import pandas as pd
|
7 |
-
import elasticsearch_serverless
|
8 |
-
import re
|
9 |
-
import os
|
10 |
-
import flet_fastapi
|
11 |
-
|
12 |
-
def remove_arabic_diacritics(text):
|
13 |
-
diacritics_pattern = re.compile(r'[\u064B-\u065F\u0670\u06D6-\u06ED]')
|
14 |
-
no_diacritics_text = re.sub(diacritics_pattern, '', text)
|
15 |
-
return no_diacritics_text
|
16 |
-
|
17 |
-
diacritics = re.compile("""
|
18 |
-
ّ | # Tashdid
|
19 |
-
َ | # Fatha
|
20 |
-
ً | # Tanwin Fath
|
21 |
-
ُ | # Damma
|
22 |
-
ٌ | # Tanwin Damm
|
23 |
-
ِ | # Kasra
|
24 |
-
ٍ | # Tanwin Kasr
|
25 |
-
ْ | # Sukun
|
26 |
-
ـ # Tatwil/Kashida
|
27 |
-
""", re.VERBOSE)
|
28 |
-
|
29 |
-
def normalize_arabic(text):
|
30 |
-
text = diacritics.sub('', text)
|
31 |
-
text = text.replace('أ', 'ا')
|
32 |
-
text = text.replace('إ', 'ا')
|
33 |
-
text = text.replace('آ', 'ا')
|
34 |
-
text = text.replace('ة', 'ه')
|
35 |
-
text = text.replace('ى', 'ي')
|
36 |
-
return text
|
37 |
-
|
38 |
-
book_selected = False
|
39 |
-
first_run = 0
|
40 |
-
p1_first_run = 0
|
41 |
-
|
42 |
-
from elasticsearch_serverless import Elasticsearch
|
43 |
-
|
44 |
-
endpoint = "https://503a98874f6241968f251209ab393a45.us-central1.gcp.cloud.es.io:443"
|
45 |
-
|
46 |
-
client = Elasticsearch(
|
47 |
-
endpoint,
|
48 |
-
api_key="SWZGTU5aQUJuNURpVDRSbmtZSGk6cXRSUFZDZ1lRR2k2Y3NvQW9JYjExUQ",
|
49 |
-
request_timeout=60, max_retries=3, retry_on_timeout=True
|
50 |
-
)
|
51 |
-
|
52 |
-
async def main(page: Page):
|
53 |
-
|
54 |
-
async def e_search(query):
|
55 |
-
query = remove_arabic_diacritics(query)
|
56 |
-
query = normalize_arabic(query)
|
57 |
-
|
58 |
-
j_query = {
|
59 |
-
"size": 250,
|
60 |
-
"query": {
|
61 |
-
"match_phrase": {
|
62 |
-
"Text": query
|
63 |
-
}
|
64 |
-
}
|
65 |
-
}
|
66 |
-
response_search = await asyncio.to_thread(client.search, index="books_idx", body=j_query)
|
67 |
-
unique_books = {}
|
68 |
-
all_hits = response_search['hits']['hits']
|
69 |
-
filtered_hits = [hit for hit in all_hits if query in hit['_source']['Text']]
|
70 |
-
|
71 |
-
for hit in filtered_hits:
|
72 |
-
book = hit['_source']['Book']
|
73 |
-
page = hit['_source']['Page']
|
74 |
-
score = hit['_score']
|
75 |
-
if book not in unique_books:
|
76 |
-
unique_books[book] = {'Pages': {page: score}, 'Count': 1}
|
77 |
-
else:
|
78 |
-
if page not in unique_books[book]['Pages']:
|
79 |
-
unique_books[book]['Pages'][page] = score
|
80 |
-
unique_books[book]['Count'] += 1
|
81 |
-
|
82 |
-
book_data = []
|
83 |
-
for book, info in unique_books.items():
|
84 |
-
pages = sorted(info['Pages'].items())
|
85 |
-
book_data.append({'Book': book, 'Pages': [page for page, _ in pages], 'Scores': [score for _, score in pages], 'Count': info['Count']})
|
86 |
-
|
87 |
-
df = pd.DataFrame(book_data)
|
88 |
-
df = df.head(10)
|
89 |
-
|
90 |
-
def get_top_two(row):
|
91 |
-
sorted_row = sorted(zip(row['Pages'], row['Scores']), key=lambda x: x[1], reverse=True)
|
92 |
-
return [page for page, score in sorted_row[:2]]
|
93 |
-
|
94 |
-
try:
|
95 |
-
df['Top Two Pages'] = df.apply(get_top_two, axis=1)
|
96 |
-
except:
|
97 |
-
pass
|
98 |
-
|
99 |
-
return df, response_search
|
100 |
-
|
101 |
-
inquiry_text = "من فضلك اكتب استفسارك."
|
102 |
-
|
103 |
-
async def e_search_book(query, phrase_search=0):
|
104 |
-
if phrase_search == 0:
|
105 |
-
book_name = book_btn.text
|
106 |
-
else:
|
107 |
-
book_name = phrase_search
|
108 |
-
|
109 |
-
url_search = 'http://localhost:9202/books_01/_search'
|
110 |
-
query = remove_arabic_diacritics(query)
|
111 |
-
query = normalize_arabic(query)
|
112 |
-
|
113 |
-
j_query = {
|
114 |
-
"size": 50,
|
115 |
-
"query": {
|
116 |
-
"bool": {
|
117 |
-
"must": [
|
118 |
-
{
|
119 |
-
"match_phrase": {
|
120 |
-
"Text": query
|
121 |
-
}
|
122 |
-
}
|
123 |
-
],
|
124 |
-
"filter": [
|
125 |
-
{
|
126 |
-
"term": {
|
127 |
-
"Book.keyword": book_name
|
128 |
-
}
|
129 |
-
}
|
130 |
-
]
|
131 |
-
}
|
132 |
-
},
|
133 |
-
"highlight": {
|
134 |
-
"fields": {
|
135 |
-
"Text": {}
|
136 |
-
}
|
137 |
-
}
|
138 |
-
}
|
139 |
-
|
140 |
-
response_search = await asyncio.to_thread(client.search, index="books_idx", body=j_query)
|
141 |
-
data = []
|
142 |
-
for hit in response_search['hits']['hits']:
|
143 |
-
book = hit['_source']['Book']
|
144 |
-
page = hit['_source']['Page']
|
145 |
-
score = hit['_score']
|
146 |
-
text = hit['_source']['Text']
|
147 |
-
data.append({
|
148 |
-
"Book": book,
|
149 |
-
"Page": page,
|
150 |
-
"Score": score,
|
151 |
-
"Text": text
|
152 |
-
})
|
153 |
-
|
154 |
-
df = pd.DataFrame(data)
|
155 |
-
return df, response_search
|
156 |
-
|
157 |
-
async def navigate_pages(e, page):
|
158 |
-
print(page)
|
159 |
-
print(df)
|
160 |
-
|
161 |
-
async def p1_page_text_fun(e, response_search, nav="None"):
|
162 |
-
p1_datatable_row.visible = False
|
163 |
-
p1_page_text.visible = True
|
164 |
-
p1_pages_row.visible = True
|
165 |
-
|
166 |
-
if nav == "None":
|
167 |
-
p1_pages_row.controls[1].controls[1].value = "رقم الصفحة \n {}".format(e.control.text)
|
168 |
-
page_num = e.control.text
|
169 |
-
else:
|
170 |
-
match = re.search(r'\d+', p1_pages_row.controls[1].controls[1].value)
|
171 |
-
if match:
|
172 |
-
page_number = match.group()
|
173 |
-
page_numbers = [int(item['_source']['Page']) for item in response_search['hits']['hits']]
|
174 |
-
page_index = page_numbers.index(int(page_number))
|
175 |
-
page_num = page_numbers[(page_index + nav)]
|
176 |
-
p1_pages_row.controls[1].controls[1].value = "رقم الصفحة \n {}".format(page_num)
|
177 |
-
|
178 |
-
filtered_data = [item for item in response_search['hits']['hits'] if item['_source']['Page'] == page_num]
|
179 |
-
highlight = filtered_data[0]['highlight']['Text']
|
180 |
-
txt = filtered_data[0]['_source']['Text']
|
181 |
-
|
182 |
-
highlight_phrases = []
|
183 |
-
for item in highlight:
|
184 |
-
matches = re.findall(r'<em>(.*?)</em>', item)
|
185 |
-
highlight_phrases.extend(matches)
|
186 |
-
|
187 |
-
highlight_phrases = list(set(highlight_phrases))
|
188 |
-
for phrase in highlight_phrases:
|
189 |
-
emphasized_phrase = f"<em>{phrase}</em>"
|
190 |
-
highlighted_text = txt.replace(phrase, emphasized_phrase)
|
191 |
-
|
192 |
-
lines = highlighted_text.split('\n')
|
193 |
-
spans = []
|
194 |
-
for line in lines:
|
195 |
-
parts = re.split(r'(<em>.*?</em>)', line)
|
196 |
-
for part in parts:
|
197 |
-
if part.startswith('<em>') and part.endswith('</em>'):
|
198 |
-
word = part[4:-5]
|
199 |
-
spans.append(TextSpan(word, TextStyle(weight=FontWeight.BOLD, color=colors.YELLOW_600)))
|
200 |
-
else:
|
201 |
-
spans.append(TextSpan(part + "\n"))
|
202 |
-
|
203 |
-
p1_page_text.content.controls[0].spans = spans
|
204 |
-
await page.update_async()
|
205 |
-
|
206 |
-
async def p1_bookname(e):
|
207 |
-
book_name = e.control.text
|
208 |
-
e_search_df, response = await e_search_book(p1_query_feild.value, book_name)
|
209 |
|
210 |
-
|
211 |
-
|
212 |
-
e_search_df = e_search_df[['Text', 'Score', 'Page']]
|
213 |
-
occurrences_count = 0
|
214 |
-
query = remove_arabic_diacritics(p1_query_feild.value)
|
215 |
-
query = normalize_arabic(query)
|
216 |
-
|
217 |
-
for hit in response['hits']['hits']:
|
218 |
-
text = hit['_source']['Text']
|
219 |
-
occurrences_count += text.count(query)
|
220 |
-
|
221 |
-
p1_info_table.controls = [create_table(response['hits']['hits'][0]['_source']['Book'],
|
222 |
-
e_search_df.shape[0],
|
223 |
-
occurrences_count,
|
224 |
-
342)]
|
225 |
-
|
226 |
-
translation = {"Book": "الكتاب", "Page": "الصفحه", "Score": "درجة التطابق", 'Text': "المحتوي"}
|
227 |
-
|
228 |
-
for i in range(len(e_search_df.columns)):
|
229 |
-
p1_res_dt.columns.append(DataColumn(Text(translation[e_search_df.columns[i]])))
|
230 |
-
|
231 |
-
pages_btns = []
|
232 |
-
for i in range(e_search_df.shape[0]):
|
233 |
-
txt = e_search_df['Text'][i][:80].replace("\n", " ")
|
234 |
-
p1_res_dt.rows.append(DataRow(cells=[
|
235 |
-
DataCell(Row([Text(f"{txt}...", width=550)])),
|
236 |
-
DataCell(Text(e_search_df['Score'][i], width=300)),
|
237 |
-
DataCell(ElevatedButton(e_search_df['Page'][i],
|
238 |
-
on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name)), width=120))
|
239 |
-
]))
|
240 |
-
|
241 |
-
next_button = ElevatedButton(
|
242 |
-
content=Row(
|
243 |
-
controls=[
|
244 |
-
Text(" التالي"),
|
245 |
-
Icon(name=icons.NAVIGATE_NEXT, size=25),
|
246 |
-
],
|
247 |
-
alignment=MainAxisAlignment.CENTER
|
248 |
-
),
|
249 |
-
on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name, 1))
|
250 |
-
)
|
251 |
-
|
252 |
-
previous_button = ElevatedButton(
|
253 |
-
content=Row(
|
254 |
-
controls=[
|
255 |
-
Icon(name=icons.NAVIGATE_BEFORE, size=25),
|
256 |
-
Text("السابق "),
|
257 |
-
],
|
258 |
-
alignment=MainAxisAlignment.CENTER
|
259 |
-
),
|
260 |
-
on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name, -1))
|
261 |
-
)
|
262 |
-
|
263 |
-
page_num_widget = Row([Text(" "), Text("رقم الصفحة \n 50", weight=FontWeight.BOLD, text_align=TextAlign.CENTER), Text(" ")])
|
264 |
-
p1_pages_row.controls = [previous_button, page_num_widget, next_button]
|
265 |
-
p1_pages_row.visible = False
|
266 |
-
await page.update_async()
|
267 |
-
|
268 |
-
def create_table(books, pages, hits, wid):
|
269 |
-
def create_cell(content, is_header=False):
|
270 |
-
return Container(
|
271 |
-
content=Text(content, weight="bold" if is_header else None),
|
272 |
-
border=border.all(1, "cyan"),
|
273 |
-
padding=padding.all(8),
|
274 |
-
border_radius=2,
|
275 |
-
alignment=alignment.center,
|
276 |
-
width=wid
|
277 |
-
)
|
278 |
-
|
279 |
-
header = Row(
|
280 |
-
controls=[
|
281 |
-
create_cell("التطابقات", is_header=True),
|
282 |
-
create_cell("الصفحات", is_header=True),
|
283 |
-
create_cell("الكتب", is_header=True)
|
284 |
-
],
|
285 |
-
alignment="center",
|
286 |
-
spacing=0
|
287 |
-
)
|
288 |
-
|
289 |
-
values = Row(
|
290 |
-
controls=[
|
291 |
-
create_cell(hits),
|
292 |
-
create_cell(pages),
|
293 |
-
create_cell(books)
|
294 |
-
],
|
295 |
-
alignment="center",
|
296 |
-
spacing=0
|
297 |
-
)
|
298 |
-
|
299 |
-
table = Column(
|
300 |
-
controls=[
|
301 |
-
header,
|
302 |
-
values
|
303 |
-
],
|
304 |
-
alignment="center",
|
305 |
-
spacing=0
|
306 |
-
)
|
307 |
-
|
308 |
-
return table
|
309 |
-
|
310 |
-
async def p1_send_button(e):
|
311 |
-
global p1_first_run
|
312 |
-
|
313 |
-
p1_datatable_row.visible = True
|
314 |
-
p1_page_text.visible = False
|
315 |
-
p1_pages_row.visible = False
|
316 |
-
|
317 |
-
p1_res_dt.columns.clear()
|
318 |
-
if p1_first_run >= 1:
|
319 |
-
p1_res_dt.rows.clear()
|
320 |
-
|
321 |
-
p1_first_run = 1
|
322 |
-
e_search_df, response_search = await e_search(p1_query_feild.value)
|
323 |
-
e_search_df = e_search_df[['Top Two Pages', 'Count', 'Pages', 'Book']]
|
324 |
-
|
325 |
-
translation = {"Book": "الكتاب", "Pages": "الصفحات", "Count": "التطابقات", 'Top Two Pages': "أعلى صفحتين متطابقتين"}
|
326 |
-
occurrences_count = 0
|
327 |
-
query = remove_arabic_diacritics(p1_query_feild.value)
|
328 |
-
query = normalize_arabic(query)
|
329 |
-
|
330 |
-
for hit in response_search['hits']['hits']:
|
331 |
-
text = hit['_source']['Text']
|
332 |
-
occurrences_count += text.count(query)
|
333 |
-
|
334 |
-
p1_info_table.controls = [create_table(e_search_df.shape[0], e_search_df['Count'].sum(), occurrences_count, 342)]
|
335 |
-
|
336 |
-
for i in range(len(e_search_df.columns)):
|
337 |
-
p1_res_dt.columns.append(DataColumn(Text(translation[e_search_df.columns[i]])))
|
338 |
-
|
339 |
-
for i in range(e_search_df.shape[0]):
|
340 |
-
occurrences_count = 0
|
341 |
-
for hit in response_search['hits']['hits']:
|
342 |
-
if hit['_source']['Book'] == e_search_df['Book'][i]:
|
343 |
-
text = hit['_source']['Text']
|
344 |
-
occurrences_count += text.count(query)
|
345 |
-
|
346 |
-
p1_res_dt.rows.append(DataRow(cells=[
|
347 |
-
DataCell(Text(e_search_df['Top Two Pages'][i], width=200)),
|
348 |
-
DataCell(Text(occurrences_count, width=120)),
|
349 |
-
DataCell(Text(e_search_df['Count'][i], width=180)),
|
350 |
-
DataCell(ElevatedButton(e_search_df['Book'][i], width=450, on_click=p1_bookname)),
|
351 |
-
]))
|
352 |
-
|
353 |
-
await page.update_async()
|
354 |
-
|
355 |
-
p1_res_dt = DataTable(
|
356 |
-
columns=[DataColumn(Text())],
|
357 |
-
border=border.all(2, "blue"),
|
358 |
-
border_radius=10,
|
359 |
-
column_spacing=10,
|
360 |
-
)
|
361 |
|
362 |
-
|
363 |
-
p1_datatable_row = Column([Row([p1_res_dt], alignment=MainAxisAlignment.CENTER)], alignment=MainAxisAlignment.CENTER, scroll=ScrollMode.ALWAYS, height=398)
|
364 |
-
p1_query_feild = TextField(label="Inquiry", hint_text=inquiry_text, expand=True, rtl=True)
|
365 |
-
p1_query_send = FloatingActionButton(icon=icons.SEND, on_click=p1_send_button)
|
366 |
-
p1_Query_row = Row(controls=[p1_query_feild, p1_query_send])
|
367 |
-
p1_page_text = Container(
|
368 |
-
content=Column([Text("", rtl=True)], scroll=ScrollMode.ALWAYS),
|
369 |
-
margin=10,
|
370 |
-
padding=10,
|
371 |
-
alignment=alignment.center,
|
372 |
-
width=1050,
|
373 |
-
height=400,
|
374 |
-
border_radius=10,
|
375 |
-
border=border.all(1, colors.CYAN),
|
376 |
-
)
|
377 |
|
378 |
-
|
379 |
-
Row([Text(), p1_pages_row, Text()], alignment=MainAxisAlignment.CENTER)])
|
380 |
|
381 |
-
|
382 |
-
|
383 |
-
p1_pages_row.visible = False
|
384 |
|
385 |
-
|
386 |
|
387 |
-
app
|
|
|
1 |
+
import subprocess
|
2 |
|
3 |
+
subprocess.call(["pip", "install", "flet==0.23.2"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
+
import os
|
6 |
+
import time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
+
gh_token = os.getenv("gh_token")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
+
url_with_token = "https://" + gh_token + "@github.com/Eslam-Magdy-1297/ESearch_FletV01.git"
|
|
|
11 |
|
12 |
+
os.system(f"git clone {url_with_token}")
|
13 |
+
|
|
|
14 |
|
15 |
+
time.sleep(10)
|
16 |
|
17 |
+
os.system("python ESearch_FletV01/app.py")
|