import asyncio
from flet import *
import requests
import json
import pandas as pd
import elasticsearch_serverless
import re
import os
import flet_fastapi
def remove_arabic_diacritics(text):
diacritics_pattern = re.compile(r'[\u064B-\u065F\u0670\u06D6-\u06ED]')
no_diacritics_text = re.sub(diacritics_pattern, '', text)
return no_diacritics_text
diacritics = re.compile("""
ّ | # Tashdid
َ | # Fatha
ً | # Tanwin Fath
ُ | # Damma
ٌ | # Tanwin Damm
ِ | # Kasra
ٍ | # Tanwin Kasr
ْ | # Sukun
ـ # Tatwil/Kashida
""", re.VERBOSE)
def normalize_arabic(text):
text = diacritics.sub('', text)
text = text.replace('أ', 'ا')
text = text.replace('إ', 'ا')
text = text.replace('آ', 'ا')
text = text.replace('ة', 'ه')
text = text.replace('ى', 'ي')
return text
book_selected = False
first_run = 0
p1_first_run = 0
from elasticsearch_serverless import Elasticsearch
endpoint = "https://503a98874f6241968f251209ab393a45.us-central1.gcp.cloud.es.io:443"
client = Elasticsearch(
endpoint,
api_key="SWZGTU5aQUJuNURpVDRSbmtZSGk6cXRSUFZDZ1lRR2k2Y3NvQW9JYjExUQ",
request_timeout=60, max_retries=3, retry_on_timeout=True
)
async def main(page: Page):
async def e_search(query):
query = remove_arabic_diacritics(query)
query = normalize_arabic(query)
j_query = {
"size": 250,
"query": {
"match_phrase": {
"Text": query
}
}
}
response_search = await asyncio.to_thread(client.search, index="books_idx", body=j_query)
unique_books = {}
all_hits = response_search['hits']['hits']
filtered_hits = [hit for hit in all_hits if query in hit['_source']['Text']]
for hit in filtered_hits:
book = hit['_source']['Book']
page = hit['_source']['Page']
score = hit['_score']
if book not in unique_books:
unique_books[book] = {'Pages': {page: score}, 'Count': 1}
else:
if page not in unique_books[book]['Pages']:
unique_books[book]['Pages'][page] = score
unique_books[book]['Count'] += 1
book_data = []
for book, info in unique_books.items():
pages = sorted(info['Pages'].items())
book_data.append({'Book': book, 'Pages': [page for page, _ in pages], 'Scores': [score for _, score in pages], 'Count': info['Count']})
df = pd.DataFrame(book_data)
df = df.head(10)
def get_top_two(row):
sorted_row = sorted(zip(row['Pages'], row['Scores']), key=lambda x: x[1], reverse=True)
return [page for page, score in sorted_row[:2]]
try:
df['Top Two Pages'] = df.apply(get_top_two, axis=1)
except:
pass
return df, response_search
inquiry_text = "من فضلك اكتب استفسارك."
async def e_search_book(query, phrase_search=0):
if phrase_search == 0:
book_name = book_btn.text
else:
book_name = phrase_search
url_search = 'http://localhost:9202/books_01/_search'
query = remove_arabic_diacritics(query)
query = normalize_arabic(query)
j_query = {
"size": 50,
"query": {
"bool": {
"must": [
{
"match_phrase": {
"Text": query
}
}
],
"filter": [
{
"term": {
"Book.keyword": book_name
}
}
]
}
},
"highlight": {
"fields": {
"Text": {}
}
}
}
response_search = await asyncio.to_thread(client.search, index="books_idx", body=j_query)
data = []
for hit in response_search['hits']['hits']:
book = hit['_source']['Book']
page = hit['_source']['Page']
score = hit['_score']
text = hit['_source']['Text']
data.append({
"Book": book,
"Page": page,
"Score": score,
"Text": text
})
df = pd.DataFrame(data)
return df, response_search
async def navigate_pages(e, page):
print(page)
print(df)
async def p1_page_text_fun(e, response_search, nav="None"):
p1_datatable_row.visible = False
p1_page_text.visible = True
p1_pages_row.visible = True
if nav == "None":
p1_pages_row.controls[1].controls[1].value = "رقم الصفحة \n {}".format(e.control.text)
page_num = e.control.text
else:
match = re.search(r'\d+', p1_pages_row.controls[1].controls[1].value)
if match:
page_number = match.group()
page_numbers = [int(item['_source']['Page']) for item in response_search['hits']['hits']]
page_index = page_numbers.index(int(page_number))
page_num = page_numbers[(page_index + nav)]
p1_pages_row.controls[1].controls[1].value = "رقم الصفحة \n {}".format(page_num)
filtered_data = [item for item in response_search['hits']['hits'] if item['_source']['Page'] == page_num]
highlight = filtered_data[0]['highlight']['Text']
txt = filtered_data[0]['_source']['Text']
highlight_phrases = []
for item in highlight:
matches = re.findall(r'(.*?)', item)
highlight_phrases.extend(matches)
highlight_phrases = list(set(highlight_phrases))
for phrase in highlight_phrases:
emphasized_phrase = f"{phrase}"
highlighted_text = txt.replace(phrase, emphasized_phrase)
lines = highlighted_text.split('\n')
spans = []
for line in lines:
parts = re.split(r'(.*?)', line)
for part in parts:
if part.startswith('') and part.endswith(''):
word = part[4:-5]
spans.append(TextSpan(word, TextStyle(weight=FontWeight.BOLD, color=colors.YELLOW_600)))
else:
spans.append(TextSpan(part + "\n"))
p1_page_text.content.controls[0].spans = spans
await page.update_async()
async def p1_bookname(e):
book_name = e.control.text
e_search_df, response = await e_search_book(p1_query_feild.value, book_name)
p1_res_dt.columns.clear()
p1_res_dt.rows.clear()
e_search_df = e_search_df[['Text', 'Score', 'Page']]
occurrences_count = 0
query = remove_arabic_diacritics(p1_query_feild.value)
query = normalize_arabic(query)
for hit in response['hits']['hits']:
text = hit['_source']['Text']
occurrences_count += text.count(query)
p1_info_table.controls = [create_table(response['hits']['hits'][0]['_source']['Book'],
e_search_df.shape[0],
occurrences_count,
342)]
translation = {"Book": "الكتاب", "Page": "الصفحه", "Score": "درجة التطابق", 'Text': "المحتوي"}
for i in range(len(e_search_df.columns)):
p1_res_dt.columns.append(DataColumn(Text(translation[e_search_df.columns[i]])))
pages_btns = []
for i in range(e_search_df.shape[0]):
txt = e_search_df['Text'][i][:80].replace("\n", " ")
p1_res_dt.rows.append(DataRow(cells=[
DataCell(Row([Text(f"{txt}...", width=550)])),
DataCell(Text(e_search_df['Score'][i], width=300)),
DataCell(ElevatedButton(e_search_df['Page'][i],
on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name)), width=120))
]))
next_button = ElevatedButton(
content=Row(
controls=[
Text(" التالي"),
Icon(name=icons.NAVIGATE_NEXT, size=25),
],
alignment=MainAxisAlignment.CENTER
),
on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name, 1))
)
previous_button = ElevatedButton(
content=Row(
controls=[
Icon(name=icons.NAVIGATE_BEFORE, size=25),
Text("السابق "),
],
alignment=MainAxisAlignment.CENTER
),
on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name, -1))
)
page_num_widget = Row([Text(" "), Text("رقم الصفحة \n 50", weight=FontWeight.BOLD, text_align=TextAlign.CENTER), Text(" ")])
p1_pages_row.controls = [previous_button, page_num_widget, next_button]
p1_pages_row.visible = False
await page.update_async()
def create_table(books, pages, hits, wid):
def create_cell(content, is_header=False):
return Container(
content=Text(content, weight="bold" if is_header else None),
border=border.all(1, "cyan"),
padding=padding.all(8),
border_radius=2,
alignment=alignment.center,
width=wid
)
header = Row(
controls=[
create_cell("التطابقات", is_header=True),
create_cell("الصفحات", is_header=True),
create_cell("الكتب", is_header=True)
],
alignment="center",
spacing=0
)
values = Row(
controls=[
create_cell(hits),
create_cell(pages),
create_cell(books)
],
alignment="center",
spacing=0
)
table = Column(
controls=[
header,
values
],
alignment="center",
spacing=0
)
return table
async def p1_send_button(e):
global p1_first_run
p1_datatable_row.visible = True
p1_page_text.visible = False
p1_pages_row.visible = False
p1_res_dt.columns.clear()
if p1_first_run >= 1:
p1_res_dt.rows.clear()
p1_first_run = 1
e_search_df, response_search = await e_search(p1_query_feild.value)
e_search_df = e_search_df[['Top Two Pages', 'Count', 'Pages', 'Book']]
translation = {"Book": "الكتاب", "Pages": "الصفحات", "Count": "التطابقات", 'Top Two Pages': "أعلى صفحتين متطابقتين"}
occurrences_count = 0
query = remove_arabic_diacritics(p1_query_feild.value)
query = normalize_arabic(query)
for hit in response_search['hits']['hits']:
text = hit['_source']['Text']
occurrences_count += text.count(query)
p1_info_table.controls = [create_table(e_search_df.shape[0], e_search_df['Count'].sum(), occurrences_count, 342)]
for i in range(len(e_search_df.columns)):
p1_res_dt.columns.append(DataColumn(Text(translation[e_search_df.columns[i]])))
for i in range(e_search_df.shape[0]):
occurrences_count = 0
for hit in response_search['hits']['hits']:
if hit['_source']['Book'] == e_search_df['Book'][i]:
text = hit['_source']['Text']
occurrences_count += text.count(query)
p1_res_dt.rows.append(DataRow(cells=[
DataCell(Text(e_search_df['Top Two Pages'][i], width=200)),
DataCell(Text(occurrences_count, width=120)),
DataCell(Text(e_search_df['Count'][i], width=180)),
DataCell(ElevatedButton(e_search_df['Book'][i], width=450, on_click=p1_bookname)),
]))
await page.update_async()
p1_res_dt = DataTable(
columns=[DataColumn(Text())],
border=border.all(2, "blue"),
border_radius=10,
column_spacing=10,
)
p1_info_table = Row([Text("")], alignment=MainAxisAlignment.CENTER)
p1_datatable_row = Column([Row([p1_res_dt], alignment=MainAxisAlignment.CENTER)], alignment=MainAxisAlignment.CENTER, scroll=ScrollMode.ALWAYS, height=398)
p1_query_feild = TextField(label="Inquiry", hint_text=inquiry_text, expand=True, rtl=True)
p1_query_send = FloatingActionButton(icon=icons.SEND, on_click=p1_send_button)
p1_Query_row = Row(controls=[p1_query_feild, p1_query_send])
p1_page_text = Container(
content=Column([Text("", rtl=True)], scroll=ScrollMode.ALWAYS),
margin=10,
padding=10,
alignment=alignment.center,
width=1050,
height=400,
border_radius=10,
border=border.all(1, colors.CYAN),
)
page_1 = Column([p1_Query_row, p1_info_table, p1_datatable_row, Row([Text(), p1_page_text, Text()], alignment=MainAxisAlignment.CENTER),
Row([Text(), p1_pages_row, Text()], alignment=MainAxisAlignment.CENTER)])
p1_datatable_row.visible = False
p1_page_text.visible = False
p1_pages_row.visible = False
await page.add_async(page_1)
app = flet_fastapi.app(main)