E-slam commited on
Commit
b1ed202
·
verified ·
1 Parent(s): 41c4e1e

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +293 -145
main.py CHANGED
@@ -1,5 +1,5 @@
1
- import flet as ft
2
- import flet_fastapi
3
  from flet import *
4
  import requests
5
  import json
@@ -7,8 +7,7 @@ import pandas as pd
7
  import elasticsearch_serverless
8
  import re
9
  import os
10
- import pickle
11
- import asyncio
12
 
13
  def remove_arabic_diacritics(text):
14
  diacritics_pattern = re.compile(r'[\u064B-\u065F\u0670\u06D6-\u06ED]')
@@ -38,79 +37,108 @@ def normalize_arabic(text):
38
 
39
  book_selected = False
40
  first_run = 0
 
 
 
41
 
42
- client = elasticsearch_serverless.Elasticsearch(
43
- "https://e790c240926f48a78eec48ccb79ddcd1.us-central1.gcp.cloud.es.io:443",
44
- api_key="MmY2VlpZOEJtVVZGLVJjNHpzRTM6akFlVmM2bHhSVmU1M25qRTIyZy1kUQ"
 
 
 
45
  )
46
 
47
- with open('books_list.pkl', 'rb') as file:
48
- books_list = pickle.load(file)
49
 
50
- def e_search(query):
51
- query = remove_arabic_diacritics(query)
52
- query = normalize_arabic(query)
53
 
54
- j_query = {
55
- "size": 500,
56
- "query": {
57
- "match": {
58
- "Text": query
 
59
  }
60
  }
61
- }
62
-
63
- response_search = client.search(index="books_jsons_01", body=j_query)
64
- unique_books = {}
65
- for hit in response_search['hits']['hits']:
66
- book = hit['_source']['Book']
67
- page = hit['_source']['Page']
68
- score = hit['_score']
69
- if book not in unique_books:
70
- unique_books[book] = {'Pages': {page: score}, 'Count': 1}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  else:
72
- if page not in unique_books[book]['Pages']:
73
- unique_books[book]['Pages'][page] = score
74
- unique_books[book]['Count'] += 1
75
-
76
- book_data = []
77
- for book, info in unique_books.items():
78
- pages = sorted(info['Pages'].items())
79
- book_data.append({'Book': book, 'Pages': [page for page, _ in pages], 'Scores': [score for _, score in pages], 'Count': info['Count']})
80
- df = pd.DataFrame(book_data)
81
- df = df.head(12)
82
-
83
- def get_top_two(row):
84
- sorted_row = sorted(zip(row['Pages'], row['Scores']), key=lambda x: x[1], reverse=True)
85
- return [page for page, score in sorted_row[:2]]
86
-
87
- try:
88
- df['Top Two Pages'] = df.apply(get_top_two, axis=1)
89
- except:
90
- pass
91
-
92
- return df
93
-
94
- async def main(page: ft.Page):
95
- async def e_search_book(query):
96
- book_name = book_btn.text
97
 
 
98
  query = remove_arabic_diacritics(query)
99
  query = normalize_arabic(query)
100
-
101
  j_query = {
102
- "size": 10,
103
  "query": {
104
  "bool": {
105
  "must": [
106
- {"match": {"Text": query}},
107
- {"match": {"Book": book_name}}
 
 
 
 
 
 
 
 
 
 
108
  ]
109
  }
 
 
 
 
 
110
  }
111
  }
112
-
113
- response_search = client.search(index="books_jsons_01", body=j_query)
114
  data = []
115
  for hit in response_search['hits']['hits']:
116
  book = hit['_source']['Book']
@@ -125,116 +153,236 @@ async def main(page: ft.Page):
125
  })
126
 
127
  df = pd.DataFrame(data)
128
- return df
129
 
130
- async def printer(e, name):
131
- query_feild.value = name
132
- await page.update_async()
133
-
134
- async def query_feild_changed(e):
135
- datatable_row.visible = False
136
- listview.visible = True
137
-
138
- query_list = books_list
139
 
140
- list_items = {
141
- name: ListTile(
142
- title=Text(name),
143
- leading=Icon(icons.ARROW_RIGHT_SHARP),
144
- on_click=lambda e, name=name: asyncio.create_task(printer(e, name))
145
 
146
- )
147
- for name in query_list
148
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
- str_lower = normalize_arabic(e.control.value)
151
- listview.controls = [
152
- list_items.get(n) for n in query_list if str_lower in normalize_arabic(n)
153
- ] if str_lower else []
 
 
 
 
 
 
 
 
154
  await page.update_async()
155
 
156
- async def send_button(e):
157
- global first_run
158
-
159
- datatable_row.visible = True
160
- listview.visible = False
161
 
162
- if first_run >= 1:
163
- res_dt.columns.clear()
164
- res_dt.rows.clear()
 
 
 
165
 
166
- first_run = 1
 
 
167
 
168
- if not book_selected:
169
- e_search_df = e_search(query_feild.value)
 
 
 
 
170
 
171
- for i in range(len(e_search_df.columns)):
172
- res_dt.columns.append(DataColumn(Text(e_search_df.columns[i])))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
- for i in range(e_search_df.shape[0]):
175
- res_dt.rows.append(DataRow(cells=[
176
- DataCell(Text(e_search_df['Book'][i], width=450)),
177
- DataCell(Text(e_search_df['Pages'][i], width=180)),
178
- DataCell(Text(e_search_df['Scores'][i], width=180)),
179
- DataCell(Text(e_search_df['Count'][i], width=120)),
180
- DataCell(Text(e_search_df['Top Two Pages'][i], width=200))
181
- ]))
 
 
182
 
183
- else:
184
- e_search_df = await e_search_book(query_feild.value)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
- for i in range(len(e_search_df.columns)):
187
- res_dt.columns.append(DataColumn(Text(e_search_df.columns[i])))
 
188
 
189
- for i in range(e_search_df.shape[0]):
190
- txt = e_search_df['Text'][i][:80].replace("\n", " ")
191
- res_dt.rows.append(DataRow(cells=[
192
- DataCell(Text(e_search_df['Book'][i], width=450)),
193
- DataCell(Text(e_search_df['Page'][i], width=180)),
194
- DataCell(Text(e_search_df['Score'][i], width=180)),
195
- DataCell(Row([Text(f"{txt}...", width=400), IconButton(icon=icons.ARROW_RIGHT_OUTLINED, height=50, on_click=show_book_text)]))
196
- ]))
197
 
198
- await page.update_async()
 
199
 
200
- async def book_btn_filter(e):
201
- global book_selected
202
- book_value = query_feild.value
 
 
 
203
 
204
- if book_value in books_list:
205
- book_btn.text = query_feild.value
206
- book_btn.bgcolor = colors.GREEN
207
- book_selected = True
208
- else:
209
- book_btn.text = "No Book Found"
210
- book_btn.bgcolor = colors.CYAN
211
- book_selected = False
212
 
213
  await page.update_async()
214
 
215
- def show_book_text(e):
216
- pass
217
-
218
- res_dt = DataTable(
219
  border=border.all(2, "blue"),
220
  border_radius=10,
221
  column_spacing=10,
222
  )
223
-
224
- datatable_row = Row([res_dt], alignment=MainAxisAlignment.CENTER)
225
- datatable_row.visible = False
226
-
227
- query_feild = TextField(label="Inquiry", hint_text="Please write your inquiry", expand=True,
228
- on_change=query_feild_changed)
229
-
230
- query_send = FloatingActionButton(icon=icons.SEND, on_click=send_button)
231
- book_btn = ElevatedButton(text="Book Filter", height=55, width=180, icon=icons.FILTER,
232
- on_click=book_btn_filter, bgcolor=colors.CYAN, color=colors.WHITE,
233
- style=ButtonStyle(shape=RoundedRectangleBorder(radius=10)))
234
-
235
- listview = ListView(expand=1, spacing=10, padding=20)
236
- Query_row = Row(controls=[query_feild, book_btn, query_send])
237
-
238
- await page.add_async(Query_row, listview, datatable_row)
 
 
 
 
 
 
 
 
 
239
 
240
  app = flet_fastapi.app(main)
 
1
+
2
+ import asyncio
3
  from flet import *
4
  import requests
5
  import json
 
7
  import elasticsearch_serverless
8
  import re
9
  import os
10
+ import flet_fastapi
 
11
 
12
  def remove_arabic_diacritics(text):
13
  diacritics_pattern = re.compile(r'[\u064B-\u065F\u0670\u06D6-\u06ED]')
 
37
 
38
  book_selected = False
39
  first_run = 0
40
+ p1_first_run = 0
41
+
42
+ from elasticsearch_serverless import Elasticsearch
43
 
44
+ endpoint = "https://503a98874f6241968f251209ab393a45.us-central1.gcp.cloud.es.io:443"
45
+
46
+ client = Elasticsearch(
47
+ endpoint,
48
+ api_key="SWZGTU5aQUJuNURpVDRSbmtZSGk6cXRSUFZDZ1lRR2k2Y3NvQW9JYjExUQ",
49
+ request_timeout=60, max_retries=3, retry_on_timeout=True
50
  )
51
 
52
+ async def main(page: Page):
 
53
 
54
+ async def e_search(query):
55
+ query = remove_arabic_diacritics(query)
56
+ query = normalize_arabic(query)
57
 
58
+ j_query = {
59
+ "size": 250,
60
+ "query": {
61
+ "match_phrase": {
62
+ "Text": query
63
+ }
64
  }
65
  }
66
+
67
+ response_search = await asyncio.to_thread(client.search, index="books_idx", body=j_query)
68
+ unique_books = {}
69
+ all_hits = response_search['hits']['hits']
70
+ filtered_hits = [hit for hit in all_hits if query in hit['_source']['Text']]
71
+
72
+ for hit in filtered_hits:
73
+ book = hit['_source']['Book']
74
+ page = hit['_source']['Page']
75
+ score = hit['_score']
76
+ if book not in unique_books:
77
+ unique_books[book] = {'Pages': {page: score}, 'Count': 1}
78
+ else:
79
+ if page not in unique_books[book]['Pages']:
80
+ unique_books[book]['Pages'][page] = score
81
+ unique_books[book]['Count'] += 1
82
+
83
+ book_data = []
84
+ for book, info in unique_books.items():
85
+ pages = sorted(info['Pages'].items())
86
+ book_data.append({'Book': book, 'Pages': [page for page, _ in pages], 'Scores': [score for _, score in pages], 'Count': info['Count']})
87
+
88
+ df = pd.DataFrame(book_data)
89
+ df = df.head(10)
90
+
91
+ def get_top_two(row):
92
+ sorted_row = sorted(zip(row['Pages'], row['Scores']), key=lambda x: x[1], reverse=True)
93
+ return [page for page, score in sorted_row[:2]]
94
+
95
+ try:
96
+ df['Top Two Pages'] = df.apply(get_top_two, axis=1)
97
+ except:
98
+ pass
99
+
100
+ return df, response_search
101
+
102
+ inquiry_text = "من فضلك اكتب استفسارك."
103
+
104
+ async def e_search_book(query, phrase_search=0):
105
+ if phrase_search == 0:
106
+ book_name = book_btn.text
107
  else:
108
+ book_name = phrase_search
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
+ url_search = 'http://localhost:9202/books_01/_search'
111
  query = remove_arabic_diacritics(query)
112
  query = normalize_arabic(query)
113
+
114
  j_query = {
115
+ "size": 50,
116
  "query": {
117
  "bool": {
118
  "must": [
119
+ {
120
+ "match_phrase": {
121
+ "Text": query
122
+ }
123
+ }
124
+ ],
125
+ "filter": [
126
+ {
127
+ "term": {
128
+ "Book.keyword": book_name
129
+ }
130
+ }
131
  ]
132
  }
133
+ },
134
+ "highlight": {
135
+ "fields": {
136
+ "Text": {}
137
+ }
138
  }
139
  }
140
+
141
+ response_search = await asyncio.to_thread(client.search, index="books_idx", body=j_query)
142
  data = []
143
  for hit in response_search['hits']['hits']:
144
  book = hit['_source']['Book']
 
153
  })
154
 
155
  df = pd.DataFrame(data)
156
+ return df, response_search
157
 
158
+ async def navigate_pages(e, page):
159
+ print(page)
160
+ print(df)
 
 
 
 
 
 
161
 
162
+ async def p1_page_text_fun(e, response_search, nav="None"):
163
+ p1_datatable_row.visible = False
164
+ p1_page_text.visible = True
165
+ p1_pages_row.visible = True
 
166
 
167
+ if nav == "None":
168
+ p1_pages_row.controls[1].controls[1].value = "رقم الصفحة \n {}".format(e.control.text)
169
+ page_num = e.control.text
170
+ else:
171
+ match = re.search(r'\d+', p1_pages_row.controls[1].controls[1].value)
172
+ if match:
173
+ page_number = match.group()
174
+ page_numbers = [int(item['_source']['Page']) for item in response_search['hits']['hits']]
175
+ page_index = page_numbers.index(int(page_number))
176
+ page_num = page_numbers[(page_index + nav)]
177
+ p1_pages_row.controls[1].controls[1].value = "رقم الصفحة \n {}".format(page_num)
178
+
179
+ filtered_data = [item for item in response_search['hits']['hits'] if item['_source']['Page'] == page_num]
180
+ highlight = filtered_data[0]['highlight']['Text']
181
+ txt = filtered_data[0]['_source']['Text']
182
+
183
+ highlight_phrases = []
184
+ for item in highlight:
185
+ matches = re.findall(r'<em>(.*?)</em>', item)
186
+ highlight_phrases.extend(matches)
187
+
188
+ highlight_phrases = list(set(highlight_phrases))
189
+ for phrase in highlight_phrases:
190
+ emphasized_phrase = f"<em>{phrase}</em>"
191
+ highlighted_text = txt.replace(phrase, emphasized_phrase)
192
 
193
+ lines = highlighted_text.split('\n')
194
+ spans = []
195
+ for line in lines:
196
+ parts = re.split(r'(<em>.*?</em>)', line)
197
+ for part in parts:
198
+ if part.startswith('<em>') and part.endswith('</em>'):
199
+ word = part[4:-5]
200
+ spans.append(TextSpan(word, TextStyle(weight=FontWeight.BOLD, color=colors.YELLOW_600)))
201
+ else:
202
+ spans.append(TextSpan(part + "\n"))
203
+
204
+ p1_page_text.content.controls[0].spans = spans
205
  await page.update_async()
206
 
207
+ async def p1_bookname(e):
208
+ book_name = e.control.text
209
+ e_search_df, response = await e_search_book(p1_query_feild.value, book_name)
 
 
210
 
211
+ p1_res_dt.columns.clear()
212
+ p1_res_dt.rows.clear()
213
+ e_search_df = e_search_df[['Text', 'Score', 'Page']]
214
+ occurrences_count = 0
215
+ query = remove_arabic_diacritics(p1_query_feild.value)
216
+ query = normalize_arabic(query)
217
 
218
+ for hit in response['hits']['hits']:
219
+ text = hit['_source']['Text']
220
+ occurrences_count += text.count(query)
221
 
222
+ p1_info_table.controls = [create_table(response['hits']['hits'][0]['_source']['Book'],
223
+ e_search_df.shape[0],
224
+ occurrences_count,
225
+ 342)]
226
+
227
+ translation = {"Book": "الكتاب", "Page": "الصفحه", "Score": "درجة التطابق", 'Text': "المحتوي"}
228
 
229
+ for i in range(len(e_search_df.columns)):
230
+ p1_res_dt.columns.append(DataColumn(Text(translation[e_search_df.columns[i]])))
231
+
232
+ pages_btns = []
233
+ for i in range(e_search_df.shape[0]):
234
+ txt = e_search_df['Text'][i][:80].replace("\n", " ")
235
+ p1_res_dt.rows.append(DataRow(cells=[
236
+ DataCell(Row([Text(f"{txt}...", width=550)])),
237
+ DataCell(Text(e_search_df['Score'][i], width=300)),
238
+ DataCell(ElevatedButton(e_search_df['Page'][i],
239
+ on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name)), width=120))
240
+ ]))
241
+
242
+ next_button = ElevatedButton(
243
+ content=Row(
244
+ controls=[
245
+ Text(" التالي"),
246
+ Icon(name=icons.NAVIGATE_NEXT, size=25),
247
+ ],
248
+ alignment=MainAxisAlignment.CENTER
249
+ ),
250
+ on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name, 1))
251
+ )
252
+
253
+ previous_button = ElevatedButton(
254
+ content=Row(
255
+ controls=[
256
+ Icon(name=icons.NAVIGATE_BEFORE, size=25),
257
+ Text("السابق "),
258
+ ],
259
+ alignment=MainAxisAlignment.CENTER
260
+ ),
261
+ on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name, -1))
262
+ )
263
+
264
+ page_num_widget = Row([Text(" "), Text("رقم الصفحة \n 50", weight=FontWeight.BOLD, text_align=TextAlign.CENTER), Text(" ")])
265
+ p1_pages_row.controls = [previous_button, page_num_widget, next_button]
266
+ p1_pages_row.visible = False
267
+ await page.update_async()
268
 
269
+ def create_table(books, pages, hits, wid):
270
+ def create_cell(content, is_header=False):
271
+ return Container(
272
+ content=Text(content, weight="bold" if is_header else None),
273
+ border=border.all(1, "cyan"),
274
+ padding=padding.all(8),
275
+ border_radius=2,
276
+ alignment=alignment.center,
277
+ width=wid
278
+ )
279
 
280
+ header = Row(
281
+ controls=[
282
+ create_cell("التطابقات", is_header=True),
283
+ create_cell("الصفحات", is_header=True),
284
+ create_cell("الكتب", is_header=True)
285
+ ],
286
+ alignment="center",
287
+ spacing=0
288
+ )
289
+
290
+ values = Row(
291
+ controls=[
292
+ create_cell(hits),
293
+ create_cell(pages),
294
+ create_cell(books)
295
+ ],
296
+ alignment="center",
297
+ spacing=0
298
+ )
299
+
300
+ table = Column(
301
+ controls=[
302
+ header,
303
+ values
304
+ ],
305
+ alignment="center",
306
+ spacing=0
307
+ )
308
+
309
+ return table
310
+
311
+ async def p1_send_button(e):
312
+ global p1_first_run
313
+
314
+ p1_datatable_row.visible = True
315
+ p1_page_text.visible = False
316
+ p1_pages_row.visible = False
317
+
318
+ p1_res_dt.columns.clear()
319
+ if p1_first_run >= 1:
320
+ p1_res_dt.rows.clear()
321
+
322
+ p1_first_run = 1
323
+ e_search_df, response_search = await e_search(p1_query_feild.value)
324
+ e_search_df = e_search_df[['Top Two Pages', 'Count', 'Pages', 'Book']]
325
+
326
+ translation = {"Book": "الكتاب", "Pages": "الصفحات", "Count": "التطابقات", 'Top Two Pages': "أعلى صفحتين متطابقتين"}
327
+ occurrences_count = 0
328
+ query = remove_arabic_diacritics(p1_query_feild.value)
329
+ query = normalize_arabic(query)
330
 
331
+ for hit in response_search['hits']['hits']:
332
+ text = hit['_source']['Text']
333
+ occurrences_count += text.count(query)
334
 
335
+ p1_info_table.controls = [create_table(e_search_df.shape[0], e_search_df['Count'].sum(), occurrences_count, 342)]
 
 
 
 
 
 
 
336
 
337
+ for i in range(len(e_search_df.columns)):
338
+ p1_res_dt.columns.append(DataColumn(Text(translation[e_search_df.columns[i]])))
339
 
340
+ for i in range(e_search_df.shape[0]):
341
+ occurrences_count = 0
342
+ for hit in response_search['hits']['hits']:
343
+ if hit['_source']['Book'] == e_search_df['Book'][i]:
344
+ text = hit['_source']['Text']
345
+ occurrences_count += text.count(query)
346
 
347
+ p1_res_dt.rows.append(DataRow(cells=[
348
+ DataCell(Text(e_search_df['Top Two Pages'][i], width=200)),
349
+ DataCell(Text(occurrences_count, width=120)),
350
+ DataCell(Text(e_search_df['Count'][i], width=180)),
351
+ DataCell(ElevatedButton(e_search_df['Book'][i], width=450, on_click=p1_bookname)),
352
+ ]))
 
 
353
 
354
  await page.update_async()
355
 
356
+ p1_res_dt = DataTable(
357
+ columns=[DataColumn(Text())],
 
 
358
  border=border.all(2, "blue"),
359
  border_radius=10,
360
  column_spacing=10,
361
  )
362
+
363
+ p1_info_table = Row([Text("")], alignment=MainAxisAlignment.CENTER)
364
+ p1_datatable_row = Column([Row([p1_res_dt], alignment=MainAxisAlignment.CENTER)], alignment=MainAxisAlignment.CENTER, scroll=ScrollMode.ALWAYS, height=398)
365
+ p1_query_feild = TextField(label="Inquiry", hint_text=inquiry_text, expand=True, rtl=True)
366
+ p1_query_send = FloatingActionButton(icon=icons.SEND, on_click=p1_send_button)
367
+ p1_Query_row = Row(controls=[p1_query_feild, p1_query_send])
368
+ p1_page_text = Container(
369
+ content=Column([Text("", rtl=True)], scroll=ScrollMode.ALWAYS),
370
+ margin=10,
371
+ padding=10,
372
+ alignment=alignment.center,
373
+ width=1050,
374
+ height=400,
375
+ border_radius=10,
376
+ border=border.all(1, colors.CYAN),
377
+ )
378
+
379
+ page_1 = Column([p1_Query_row, p1_info_table, p1_datatable_row, Row([Text(), p1_page_text, Text()], alignment=MainAxisAlignment.CENTER),
380
+ Row([Text(), p1_pages_row, Text()], alignment=MainAxisAlignment.CENTER)])
381
+
382
+ p1_datatable_row.visible = False
383
+ p1_page_text.visible = False
384
+ p1_pages_row.visible = False
385
+
386
+ await page.add_async(page_1)
387
 
388
  app = flet_fastapi.app(main)