E-slam commited on
Commit
1ec044c
·
verified ·
1 Parent(s): e2b1e7f

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +10 -380
main.py CHANGED
@@ -1,387 +1,17 @@
 
1
 
2
- import asyncio
3
- from flet import *
4
- import requests
5
- import json
6
- import pandas as pd
7
- import elasticsearch_serverless
8
- import re
9
- import os
10
- import flet_fastapi
11
-
12
- def remove_arabic_diacritics(text):
13
- diacritics_pattern = re.compile(r'[\u064B-\u065F\u0670\u06D6-\u06ED]')
14
- no_diacritics_text = re.sub(diacritics_pattern, '', text)
15
- return no_diacritics_text
16
-
17
- diacritics = re.compile("""
18
- ّ | # Tashdid
19
- َ | # Fatha
20
- ً | # Tanwin Fath
21
- ُ | # Damma
22
- ٌ | # Tanwin Damm
23
- ِ | # Kasra
24
- ٍ | # Tanwin Kasr
25
- ْ | # Sukun
26
- ـ # Tatwil/Kashida
27
- """, re.VERBOSE)
28
-
29
- def normalize_arabic(text):
30
- text = diacritics.sub('', text)
31
- text = text.replace('أ', 'ا')
32
- text = text.replace('إ', 'ا')
33
- text = text.replace('آ', 'ا')
34
- text = text.replace('ة', 'ه')
35
- text = text.replace('ى', 'ي')
36
- return text
37
-
38
- book_selected = False
39
- first_run = 0
40
- p1_first_run = 0
41
-
42
- from elasticsearch_serverless import Elasticsearch
43
-
44
- endpoint = "https://503a98874f6241968f251209ab393a45.us-central1.gcp.cloud.es.io:443"
45
-
46
- client = Elasticsearch(
47
- endpoint,
48
- api_key="SWZGTU5aQUJuNURpVDRSbmtZSGk6cXRSUFZDZ1lRR2k2Y3NvQW9JYjExUQ",
49
- request_timeout=60, max_retries=3, retry_on_timeout=True
50
- )
51
-
52
- async def main(page: Page):
53
-
54
- async def e_search(query):
55
- query = remove_arabic_diacritics(query)
56
- query = normalize_arabic(query)
57
-
58
- j_query = {
59
- "size": 250,
60
- "query": {
61
- "match_phrase": {
62
- "Text": query
63
- }
64
- }
65
- }
66
- response_search = await asyncio.to_thread(client.search, index="books_idx", body=j_query)
67
- unique_books = {}
68
- all_hits = response_search['hits']['hits']
69
- filtered_hits = [hit for hit in all_hits if query in hit['_source']['Text']]
70
-
71
- for hit in filtered_hits:
72
- book = hit['_source']['Book']
73
- page = hit['_source']['Page']
74
- score = hit['_score']
75
- if book not in unique_books:
76
- unique_books[book] = {'Pages': {page: score}, 'Count': 1}
77
- else:
78
- if page not in unique_books[book]['Pages']:
79
- unique_books[book]['Pages'][page] = score
80
- unique_books[book]['Count'] += 1
81
-
82
- book_data = []
83
- for book, info in unique_books.items():
84
- pages = sorted(info['Pages'].items())
85
- book_data.append({'Book': book, 'Pages': [page for page, _ in pages], 'Scores': [score for _, score in pages], 'Count': info['Count']})
86
-
87
- df = pd.DataFrame(book_data)
88
- df = df.head(10)
89
-
90
- def get_top_two(row):
91
- sorted_row = sorted(zip(row['Pages'], row['Scores']), key=lambda x: x[1], reverse=True)
92
- return [page for page, score in sorted_row[:2]]
93
-
94
- try:
95
- df['Top Two Pages'] = df.apply(get_top_two, axis=1)
96
- except:
97
- pass
98
-
99
- return df, response_search
100
-
101
- inquiry_text = "من فضلك اكتب استفسارك."
102
-
103
- async def e_search_book(query, phrase_search=0):
104
- if phrase_search == 0:
105
- book_name = book_btn.text
106
- else:
107
- book_name = phrase_search
108
-
109
- url_search = 'http://localhost:9202/books_01/_search'
110
- query = remove_arabic_diacritics(query)
111
- query = normalize_arabic(query)
112
-
113
- j_query = {
114
- "size": 50,
115
- "query": {
116
- "bool": {
117
- "must": [
118
- {
119
- "match_phrase": {
120
- "Text": query
121
- }
122
- }
123
- ],
124
- "filter": [
125
- {
126
- "term": {
127
- "Book.keyword": book_name
128
- }
129
- }
130
- ]
131
- }
132
- },
133
- "highlight": {
134
- "fields": {
135
- "Text": {}
136
- }
137
- }
138
- }
139
-
140
- response_search = await asyncio.to_thread(client.search, index="books_idx", body=j_query)
141
- data = []
142
- for hit in response_search['hits']['hits']:
143
- book = hit['_source']['Book']
144
- page = hit['_source']['Page']
145
- score = hit['_score']
146
- text = hit['_source']['Text']
147
- data.append({
148
- "Book": book,
149
- "Page": page,
150
- "Score": score,
151
- "Text": text
152
- })
153
-
154
- df = pd.DataFrame(data)
155
- return df, response_search
156
-
157
- async def navigate_pages(e, page):
158
- print(page)
159
- print(df)
160
-
161
- async def p1_page_text_fun(e, response_search, nav="None"):
162
- p1_datatable_row.visible = False
163
- p1_page_text.visible = True
164
- p1_pages_row.visible = True
165
-
166
- if nav == "None":
167
- p1_pages_row.controls[1].controls[1].value = "رقم الصفحة \n {}".format(e.control.text)
168
- page_num = e.control.text
169
- else:
170
- match = re.search(r'\d+', p1_pages_row.controls[1].controls[1].value)
171
- if match:
172
- page_number = match.group()
173
- page_numbers = [int(item['_source']['Page']) for item in response_search['hits']['hits']]
174
- page_index = page_numbers.index(int(page_number))
175
- page_num = page_numbers[(page_index + nav)]
176
- p1_pages_row.controls[1].controls[1].value = "رقم الصفحة \n {}".format(page_num)
177
-
178
- filtered_data = [item for item in response_search['hits']['hits'] if item['_source']['Page'] == page_num]
179
- highlight = filtered_data[0]['highlight']['Text']
180
- txt = filtered_data[0]['_source']['Text']
181
-
182
- highlight_phrases = []
183
- for item in highlight:
184
- matches = re.findall(r'<em>(.*?)</em>', item)
185
- highlight_phrases.extend(matches)
186
-
187
- highlight_phrases = list(set(highlight_phrases))
188
- for phrase in highlight_phrases:
189
- emphasized_phrase = f"<em>{phrase}</em>"
190
- highlighted_text = txt.replace(phrase, emphasized_phrase)
191
-
192
- lines = highlighted_text.split('\n')
193
- spans = []
194
- for line in lines:
195
- parts = re.split(r'(<em>.*?</em>)', line)
196
- for part in parts:
197
- if part.startswith('<em>') and part.endswith('</em>'):
198
- word = part[4:-5]
199
- spans.append(TextSpan(word, TextStyle(weight=FontWeight.BOLD, color=colors.YELLOW_600)))
200
- else:
201
- spans.append(TextSpan(part + "\n"))
202
-
203
- p1_page_text.content.controls[0].spans = spans
204
- await page.update_async()
205
-
206
- async def p1_bookname(e):
207
- book_name = e.control.text
208
- e_search_df, response = await e_search_book(p1_query_feild.value, book_name)
209
 
210
- p1_res_dt.columns.clear()
211
- p1_res_dt.rows.clear()
212
- e_search_df = e_search_df[['Text', 'Score', 'Page']]
213
- occurrences_count = 0
214
- query = remove_arabic_diacritics(p1_query_feild.value)
215
- query = normalize_arabic(query)
216
-
217
- for hit in response['hits']['hits']:
218
- text = hit['_source']['Text']
219
- occurrences_count += text.count(query)
220
-
221
- p1_info_table.controls = [create_table(response['hits']['hits'][0]['_source']['Book'],
222
- e_search_df.shape[0],
223
- occurrences_count,
224
- 342)]
225
-
226
- translation = {"Book": "الكتاب", "Page": "الصفحه", "Score": "درجة التطابق", 'Text': "المحتوي"}
227
-
228
- for i in range(len(e_search_df.columns)):
229
- p1_res_dt.columns.append(DataColumn(Text(translation[e_search_df.columns[i]])))
230
-
231
- pages_btns = []
232
- for i in range(e_search_df.shape[0]):
233
- txt = e_search_df['Text'][i][:80].replace("\n", " ")
234
- p1_res_dt.rows.append(DataRow(cells=[
235
- DataCell(Row([Text(f"{txt}...", width=550)])),
236
- DataCell(Text(e_search_df['Score'][i], width=300)),
237
- DataCell(ElevatedButton(e_search_df['Page'][i],
238
- on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name)), width=120))
239
- ]))
240
-
241
- next_button = ElevatedButton(
242
- content=Row(
243
- controls=[
244
- Text(" التالي"),
245
- Icon(name=icons.NAVIGATE_NEXT, size=25),
246
- ],
247
- alignment=MainAxisAlignment.CENTER
248
- ),
249
- on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name, 1))
250
- )
251
-
252
- previous_button = ElevatedButton(
253
- content=Row(
254
- controls=[
255
- Icon(name=icons.NAVIGATE_BEFORE, size=25),
256
- Text("السابق "),
257
- ],
258
- alignment=MainAxisAlignment.CENTER
259
- ),
260
- on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name, -1))
261
- )
262
-
263
- page_num_widget = Row([Text(" "), Text("رقم الصفحة \n 50", weight=FontWeight.BOLD, text_align=TextAlign.CENTER), Text(" ")])
264
- p1_pages_row.controls = [previous_button, page_num_widget, next_button]
265
- p1_pages_row.visible = False
266
- await page.update_async()
267
-
268
- def create_table(books, pages, hits, wid):
269
- def create_cell(content, is_header=False):
270
- return Container(
271
- content=Text(content, weight="bold" if is_header else None),
272
- border=border.all(1, "cyan"),
273
- padding=padding.all(8),
274
- border_radius=2,
275
- alignment=alignment.center,
276
- width=wid
277
- )
278
-
279
- header = Row(
280
- controls=[
281
- create_cell("التطابقات", is_header=True),
282
- create_cell("الصفحات", is_header=True),
283
- create_cell("الكتب", is_header=True)
284
- ],
285
- alignment="center",
286
- spacing=0
287
- )
288
-
289
- values = Row(
290
- controls=[
291
- create_cell(hits),
292
- create_cell(pages),
293
- create_cell(books)
294
- ],
295
- alignment="center",
296
- spacing=0
297
- )
298
-
299
- table = Column(
300
- controls=[
301
- header,
302
- values
303
- ],
304
- alignment="center",
305
- spacing=0
306
- )
307
-
308
- return table
309
-
310
- async def p1_send_button(e):
311
- global p1_first_run
312
-
313
- p1_datatable_row.visible = True
314
- p1_page_text.visible = False
315
- p1_pages_row.visible = False
316
-
317
- p1_res_dt.columns.clear()
318
- if p1_first_run >= 1:
319
- p1_res_dt.rows.clear()
320
-
321
- p1_first_run = 1
322
- e_search_df, response_search = await e_search(p1_query_feild.value)
323
- e_search_df = e_search_df[['Top Two Pages', 'Count', 'Pages', 'Book']]
324
-
325
- translation = {"Book": "الكتاب", "Pages": "الصفحات", "Count": "التطابقات", 'Top Two Pages': "أعلى صفحتين متطابقتين"}
326
- occurrences_count = 0
327
- query = remove_arabic_diacritics(p1_query_feild.value)
328
- query = normalize_arabic(query)
329
-
330
- for hit in response_search['hits']['hits']:
331
- text = hit['_source']['Text']
332
- occurrences_count += text.count(query)
333
-
334
- p1_info_table.controls = [create_table(e_search_df.shape[0], e_search_df['Count'].sum(), occurrences_count, 342)]
335
-
336
- for i in range(len(e_search_df.columns)):
337
- p1_res_dt.columns.append(DataColumn(Text(translation[e_search_df.columns[i]])))
338
-
339
- for i in range(e_search_df.shape[0]):
340
- occurrences_count = 0
341
- for hit in response_search['hits']['hits']:
342
- if hit['_source']['Book'] == e_search_df['Book'][i]:
343
- text = hit['_source']['Text']
344
- occurrences_count += text.count(query)
345
-
346
- p1_res_dt.rows.append(DataRow(cells=[
347
- DataCell(Text(e_search_df['Top Two Pages'][i], width=200)),
348
- DataCell(Text(occurrences_count, width=120)),
349
- DataCell(Text(e_search_df['Count'][i], width=180)),
350
- DataCell(ElevatedButton(e_search_df['Book'][i], width=450, on_click=p1_bookname)),
351
- ]))
352
-
353
- await page.update_async()
354
-
355
- p1_res_dt = DataTable(
356
- columns=[DataColumn(Text())],
357
- border=border.all(2, "blue"),
358
- border_radius=10,
359
- column_spacing=10,
360
- )
361
 
362
- p1_info_table = Row([Text("")], alignment=MainAxisAlignment.CENTER)
363
- p1_datatable_row = Column([Row([p1_res_dt], alignment=MainAxisAlignment.CENTER)], alignment=MainAxisAlignment.CENTER, scroll=ScrollMode.ALWAYS, height=398)
364
- p1_query_feild = TextField(label="Inquiry", hint_text=inquiry_text, expand=True, rtl=True)
365
- p1_query_send = FloatingActionButton(icon=icons.SEND, on_click=p1_send_button)
366
- p1_Query_row = Row(controls=[p1_query_feild, p1_query_send])
367
- p1_page_text = Container(
368
- content=Column([Text("", rtl=True)], scroll=ScrollMode.ALWAYS),
369
- margin=10,
370
- padding=10,
371
- alignment=alignment.center,
372
- width=1050,
373
- height=400,
374
- border_radius=10,
375
- border=border.all(1, colors.CYAN),
376
- )
377
 
378
- page_1 = Column([p1_Query_row, p1_info_table, p1_datatable_row, Row([Text(), p1_page_text, Text()], alignment=MainAxisAlignment.CENTER),
379
- Row([Text(), p1_pages_row, Text()], alignment=MainAxisAlignment.CENTER)])
380
 
381
- p1_datatable_row.visible = False
382
- p1_page_text.visible = False
383
- p1_pages_row.visible = False
384
 
385
- await page.add_async(page_1)
386
 
387
- app = flet_fastapi.app(main)
 
1
+ import subprocess
2
 
3
+ subprocess.call(["pip", "install", "flet==0.23.2"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ import os
6
+ import time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ gh_token = os.getenv("gh_token")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ url_with_token = "https://" + gh_token + "@github.com/Eslam-Magdy-1297/ESearch_FletV01.git"
 
11
 
12
+ os.system(f"git clone {url_with_token}")
13
+
 
14
 
15
+ time.sleep(10)
16
 
17
+ os.system("python ESearch_FletV01/app.py")