Spaces:
Sleeping
Sleeping
import asyncio | |
from flet import * | |
import requests | |
import json | |
import pandas as pd | |
import elasticsearch_serverless | |
import re | |
import os | |
import flet_fastapi | |
def remove_arabic_diacritics(text): | |
diacritics_pattern = re.compile(r'[\u064B-\u065F\u0670\u06D6-\u06ED]') | |
no_diacritics_text = re.sub(diacritics_pattern, '', text) | |
return no_diacritics_text | |
diacritics = re.compile(""" | |
ّ | # Tashdid | |
َ | # Fatha | |
ً | # Tanwin Fath | |
ُ | # Damma | |
ٌ | # Tanwin Damm | |
ِ | # Kasra | |
ٍ | # Tanwin Kasr | |
ْ | # Sukun | |
ـ # Tatwil/Kashida | |
""", re.VERBOSE) | |
def normalize_arabic(text): | |
text = diacritics.sub('', text) | |
text = text.replace('أ', 'ا') | |
text = text.replace('إ', 'ا') | |
text = text.replace('آ', 'ا') | |
text = text.replace('ة', 'ه') | |
text = text.replace('ى', 'ي') | |
return text | |
book_selected = False | |
first_run = 0 | |
p1_first_run = 0 | |
from elasticsearch_serverless import Elasticsearch | |
endpoint = "https://503a98874f6241968f251209ab393a45.us-central1.gcp.cloud.es.io:443" | |
client = Elasticsearch( | |
endpoint, | |
api_key="SWZGTU5aQUJuNURpVDRSbmtZSGk6cXRSUFZDZ1lRR2k2Y3NvQW9JYjExUQ", | |
request_timeout=60, max_retries=3, retry_on_timeout=True | |
) | |
async def main(page: Page): | |
async def e_search(query): | |
query = remove_arabic_diacritics(query) | |
query = normalize_arabic(query) | |
j_query = { | |
"size": 250, | |
"query": { | |
"match_phrase": { | |
"Text": query | |
} | |
} | |
} | |
response_search = await asyncio.to_thread(client.search, index="books_idx", body=j_query) | |
unique_books = {} | |
all_hits = response_search['hits']['hits'] | |
filtered_hits = [hit for hit in all_hits if query in hit['_source']['Text']] | |
for hit in filtered_hits: | |
book = hit['_source']['Book'] | |
page = hit['_source']['Page'] | |
score = hit['_score'] | |
if book not in unique_books: | |
unique_books[book] = {'Pages': {page: score}, 'Count': 1} | |
else: | |
if page not in unique_books[book]['Pages']: | |
unique_books[book]['Pages'][page] = score | |
unique_books[book]['Count'] += 1 | |
book_data = [] | |
for book, info in unique_books.items(): | |
pages = sorted(info['Pages'].items()) | |
book_data.append({'Book': book, 'Pages': [page for page, _ in pages], 'Scores': [score for _, score in pages], 'Count': info['Count']}) | |
df = pd.DataFrame(book_data) | |
df = df.head(10) | |
def get_top_two(row): | |
sorted_row = sorted(zip(row['Pages'], row['Scores']), key=lambda x: x[1], reverse=True) | |
return [page for page, score in sorted_row[:2]] | |
try: | |
df['Top Two Pages'] = df.apply(get_top_two, axis=1) | |
except: | |
pass | |
return df, response_search | |
inquiry_text = "من فضلك اكتب استفسارك." | |
async def e_search_book(query, phrase_search=0): | |
if phrase_search == 0: | |
book_name = book_btn.text | |
else: | |
book_name = phrase_search | |
url_search = 'http://localhost:9202/books_01/_search' | |
query = remove_arabic_diacritics(query) | |
query = normalize_arabic(query) | |
j_query = { | |
"size": 50, | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"match_phrase": { | |
"Text": query | |
} | |
} | |
], | |
"filter": [ | |
{ | |
"term": { | |
"Book.keyword": book_name | |
} | |
} | |
] | |
} | |
}, | |
"highlight": { | |
"fields": { | |
"Text": {} | |
} | |
} | |
} | |
response_search = await asyncio.to_thread(client.search, index="books_idx", body=j_query) | |
data = [] | |
for hit in response_search['hits']['hits']: | |
book = hit['_source']['Book'] | |
page = hit['_source']['Page'] | |
score = hit['_score'] | |
text = hit['_source']['Text'] | |
data.append({ | |
"Book": book, | |
"Page": page, | |
"Score": score, | |
"Text": text | |
}) | |
df = pd.DataFrame(data) | |
return df, response_search | |
async def navigate_pages(e, page): | |
print(page) | |
print(df) | |
async def p1_page_text_fun(e, response_search, nav="None"): | |
p1_datatable_row.visible = False | |
p1_page_text.visible = True | |
p1_pages_row.visible = True | |
if nav == "None": | |
p1_pages_row.controls[1].controls[1].value = "رقم الصفحة \n {}".format(e.control.text) | |
page_num = e.control.text | |
else: | |
match = re.search(r'\d+', p1_pages_row.controls[1].controls[1].value) | |
if match: | |
page_number = match.group() | |
page_numbers = [int(item['_source']['Page']) for item in response_search['hits']['hits']] | |
page_index = page_numbers.index(int(page_number)) | |
page_num = page_numbers[(page_index + nav)] | |
p1_pages_row.controls[1].controls[1].value = "رقم الصفحة \n {}".format(page_num) | |
filtered_data = [item for item in response_search['hits']['hits'] if item['_source']['Page'] == page_num] | |
highlight = filtered_data[0]['highlight']['Text'] | |
txt = filtered_data[0]['_source']['Text'] | |
highlight_phrases = [] | |
for item in highlight: | |
matches = re.findall(r'<em>(.*?)</em>', item) | |
highlight_phrases.extend(matches) | |
highlight_phrases = list(set(highlight_phrases)) | |
for phrase in highlight_phrases: | |
emphasized_phrase = f"<em>{phrase}</em>" | |
highlighted_text = txt.replace(phrase, emphasized_phrase) | |
lines = highlighted_text.split('\n') | |
spans = [] | |
for line in lines: | |
parts = re.split(r'(<em>.*?</em>)', line) | |
for part in parts: | |
if part.startswith('<em>') and part.endswith('</em>'): | |
word = part[4:-5] | |
spans.append(TextSpan(word, TextStyle(weight=FontWeight.BOLD, color=colors.YELLOW_600))) | |
else: | |
spans.append(TextSpan(part + "\n")) | |
p1_page_text.content.controls[0].spans = spans | |
await page.update_async() | |
async def p1_bookname(e): | |
book_name = e.control.text | |
e_search_df, response = await e_search_book(p1_query_feild.value, book_name) | |
p1_res_dt.columns.clear() | |
p1_res_dt.rows.clear() | |
e_search_df = e_search_df[['Text', 'Score', 'Page']] | |
occurrences_count = 0 | |
query = remove_arabic_diacritics(p1_query_feild.value) | |
query = normalize_arabic(query) | |
for hit in response['hits']['hits']: | |
text = hit['_source']['Text'] | |
occurrences_count += text.count(query) | |
p1_info_table.controls = [create_table(response['hits']['hits'][0]['_source']['Book'], | |
e_search_df.shape[0], | |
occurrences_count, | |
342)] | |
translation = {"Book": "الكتاب", "Page": "الصفحه", "Score": "درجة التطابق", 'Text': "المحتوي"} | |
for i in range(len(e_search_df.columns)): | |
p1_res_dt.columns.append(DataColumn(Text(translation[e_search_df.columns[i]]))) | |
pages_btns = [] | |
for i in range(e_search_df.shape[0]): | |
txt = e_search_df['Text'][i][:80].replace("\n", " ") | |
p1_res_dt.rows.append(DataRow(cells=[ | |
DataCell(Row([Text(f"{txt}...", width=550)])), | |
DataCell(Text(e_search_df['Score'][i], width=300)), | |
DataCell(ElevatedButton(e_search_df['Page'][i], | |
on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name)), width=120)) | |
])) | |
next_button = ElevatedButton( | |
content=Row( | |
controls=[ | |
Text(" التالي"), | |
Icon(name=icons.NAVIGATE_NEXT, size=25), | |
], | |
alignment=MainAxisAlignment.CENTER | |
), | |
on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name, 1)) | |
) | |
previous_button = ElevatedButton( | |
content=Row( | |
controls=[ | |
Icon(name=icons.NAVIGATE_BEFORE, size=25), | |
Text("السابق "), | |
], | |
alignment=MainAxisAlignment.CENTER | |
), | |
on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name, -1)) | |
) | |
page_num_widget = Row([Text(" "), Text("رقم الصفحة \n 50", weight=FontWeight.BOLD, text_align=TextAlign.CENTER), Text(" ")]) | |
p1_pages_row.controls = [previous_button, page_num_widget, next_button] | |
p1_pages_row.visible = False | |
await page.update_async() | |
def create_table(books, pages, hits, wid): | |
def create_cell(content, is_header=False): | |
return Container( | |
content=Text(content, weight="bold" if is_header else None), | |
border=border.all(1, "cyan"), | |
padding=padding.all(8), | |
border_radius=2, | |
alignment=alignment.center, | |
width=wid | |
) | |
header = Row( | |
controls=[ | |
create_cell("التطابقات", is_header=True), | |
create_cell("الصفحات", is_header=True), | |
create_cell("الكتب", is_header=True) | |
], | |
alignment="center", | |
spacing=0 | |
) | |
values = Row( | |
controls=[ | |
create_cell(hits), | |
create_cell(pages), | |
create_cell(books) | |
], | |
alignment="center", | |
spacing=0 | |
) | |
table = Column( | |
controls=[ | |
header, | |
values | |
], | |
alignment="center", | |
spacing=0 | |
) | |
return table | |
async def p1_send_button(e): | |
global p1_first_run | |
p1_datatable_row.visible = True | |
p1_page_text.visible = False | |
p1_pages_row.visible = False | |
p1_res_dt.columns.clear() | |
if p1_first_run >= 1: | |
p1_res_dt.rows.clear() | |
p1_first_run = 1 | |
e_search_df, response_search = await e_search(p1_query_feild.value) | |
e_search_df = e_search_df[['Top Two Pages', 'Count', 'Pages', 'Book']] | |
translation = {"Book": "الكتاب", "Pages": "الصفحات", "Count": "التطابقات", 'Top Two Pages': "أعلى صفحتين متطابقتين"} | |
occurrences_count = 0 | |
query = remove_arabic_diacritics(p1_query_feild.value) | |
query = normalize_arabic(query) | |
for hit in response_search['hits']['hits']: | |
text = hit['_source']['Text'] | |
occurrences_count += text.count(query) | |
p1_info_table.controls = [create_table(e_search_df.shape[0], e_search_df['Count'].sum(), occurrences_count, 342)] | |
for i in range(len(e_search_df.columns)): | |
p1_res_dt.columns.append(DataColumn(Text(translation[e_search_df.columns[i]]))) | |
for i in range(e_search_df.shape[0]): | |
occurrences_count = 0 | |
for hit in response_search['hits']['hits']: | |
if hit['_source']['Book'] == e_search_df['Book'][i]: | |
text = hit['_source']['Text'] | |
occurrences_count += text.count(query) | |
p1_res_dt.rows.append(DataRow(cells=[ | |
DataCell(Text(e_search_df['Top Two Pages'][i], width=200)), | |
DataCell(Text(occurrences_count, width=120)), | |
DataCell(Text(e_search_df['Count'][i], width=180)), | |
DataCell(ElevatedButton(e_search_df['Book'][i], width=450, on_click=p1_bookname)), | |
])) | |
await page.update_async() | |
p1_res_dt = DataTable( | |
columns=[DataColumn(Text())], | |
border=border.all(2, "blue"), | |
border_radius=10, | |
column_spacing=10, | |
) | |
p1_info_table = Row([Text("")], alignment=MainAxisAlignment.CENTER) | |
p1_datatable_row = Column([Row([p1_res_dt], alignment=MainAxisAlignment.CENTER)], alignment=MainAxisAlignment.CENTER, scroll=ScrollMode.ALWAYS, height=398) | |
p1_query_feild = TextField(label="Inquiry", hint_text=inquiry_text, expand=True, rtl=True) | |
p1_query_send = FloatingActionButton(icon=icons.SEND, on_click=p1_send_button) | |
p1_Query_row = Row(controls=[p1_query_feild, p1_query_send]) | |
p1_page_text = Container( | |
content=Column([Text("", rtl=True)], scroll=ScrollMode.ALWAYS), | |
margin=10, | |
padding=10, | |
alignment=alignment.center, | |
width=1050, | |
height=400, | |
border_radius=10, | |
border=border.all(1, colors.CYAN), | |
) | |
page_1 = Column([p1_Query_row, p1_info_table, p1_datatable_row, Row([Text(), p1_page_text, Text()], alignment=MainAxisAlignment.CENTER), | |
Row([Text(), p1_pages_row, Text()], alignment=MainAxisAlignment.CENTER)]) | |
p1_datatable_row.visible = False | |
p1_page_text.visible = False | |
p1_pages_row.visible = False | |
await page.add_async(page_1) | |
app = flet_fastapi.app(main) | |