import asyncio from flet import * import requests import json import pandas as pd import elasticsearch_serverless import re import os import flet_fastapi def remove_arabic_diacritics(text): diacritics_pattern = re.compile(r'[\u064B-\u065F\u0670\u06D6-\u06ED]') no_diacritics_text = re.sub(diacritics_pattern, '', text) return no_diacritics_text diacritics = re.compile(""" ّ | # Tashdid َ | # Fatha ً | # Tanwin Fath ُ | # Damma ٌ | # Tanwin Damm ِ | # Kasra ٍ | # Tanwin Kasr ْ | # Sukun ـ # Tatwil/Kashida """, re.VERBOSE) def normalize_arabic(text): text = diacritics.sub('', text) text = text.replace('أ', 'ا') text = text.replace('إ', 'ا') text = text.replace('آ', 'ا') text = text.replace('ة', 'ه') text = text.replace('ى', 'ي') return text book_selected = False first_run = 0 p1_first_run = 0 from elasticsearch_serverless import Elasticsearch endpoint = "https://503a98874f6241968f251209ab393a45.us-central1.gcp.cloud.es.io:443" client = Elasticsearch( endpoint, api_key="SWZGTU5aQUJuNURpVDRSbmtZSGk6cXRSUFZDZ1lRR2k2Y3NvQW9JYjExUQ", request_timeout=60, max_retries=3, retry_on_timeout=True ) async def main(page: Page): async def e_search(query): query = remove_arabic_diacritics(query) query = normalize_arabic(query) j_query = { "size": 250, "query": { "match_phrase": { "Text": query } } } response_search = await asyncio.to_thread(client.search, index="books_idx", body=j_query) unique_books = {} all_hits = response_search['hits']['hits'] filtered_hits = [hit for hit in all_hits if query in hit['_source']['Text']] for hit in filtered_hits: book = hit['_source']['Book'] page = hit['_source']['Page'] score = hit['_score'] if book not in unique_books: unique_books[book] = {'Pages': {page: score}, 'Count': 1} else: if page not in unique_books[book]['Pages']: unique_books[book]['Pages'][page] = score unique_books[book]['Count'] += 1 book_data = [] for book, info in unique_books.items(): pages = sorted(info['Pages'].items()) book_data.append({'Book': book, 'Pages': [page for page, _ in pages], 'Scores': [score for _, score in pages], 'Count': info['Count']}) df = pd.DataFrame(book_data) df = df.head(10) def get_top_two(row): sorted_row = sorted(zip(row['Pages'], row['Scores']), key=lambda x: x[1], reverse=True) return [page for page, score in sorted_row[:2]] try: df['Top Two Pages'] = df.apply(get_top_two, axis=1) except: pass return df, response_search inquiry_text = "من فضلك اكتب استفسارك." async def e_search_book(query, phrase_search=0): if phrase_search == 0: book_name = book_btn.text else: book_name = phrase_search url_search = 'http://localhost:9202/books_01/_search' query = remove_arabic_diacritics(query) query = normalize_arabic(query) j_query = { "size": 50, "query": { "bool": { "must": [ { "match_phrase": { "Text": query } } ], "filter": [ { "term": { "Book.keyword": book_name } } ] } }, "highlight": { "fields": { "Text": {} } } } response_search = await asyncio.to_thread(client.search, index="books_idx", body=j_query) data = [] for hit in response_search['hits']['hits']: book = hit['_source']['Book'] page = hit['_source']['Page'] score = hit['_score'] text = hit['_source']['Text'] data.append({ "Book": book, "Page": page, "Score": score, "Text": text }) df = pd.DataFrame(data) return df, response_search async def navigate_pages(e, page): print(page) print(df) async def p1_page_text_fun(e, response_search, nav="None"): p1_datatable_row.visible = False p1_page_text.visible = True p1_pages_row.visible = True if nav == "None": p1_pages_row.controls[1].controls[1].value = "رقم الصفحة \n {}".format(e.control.text) page_num = e.control.text else: match = re.search(r'\d+', p1_pages_row.controls[1].controls[1].value) if match: page_number = match.group() page_numbers = [int(item['_source']['Page']) for item in response_search['hits']['hits']] page_index = page_numbers.index(int(page_number)) page_num = page_numbers[(page_index + nav)] p1_pages_row.controls[1].controls[1].value = "رقم الصفحة \n {}".format(page_num) filtered_data = [item for item in response_search['hits']['hits'] if item['_source']['Page'] == page_num] highlight = filtered_data[0]['highlight']['Text'] txt = filtered_data[0]['_source']['Text'] highlight_phrases = [] for item in highlight: matches = re.findall(r'(.*?)', item) highlight_phrases.extend(matches) highlight_phrases = list(set(highlight_phrases)) for phrase in highlight_phrases: emphasized_phrase = f"{phrase}" highlighted_text = txt.replace(phrase, emphasized_phrase) lines = highlighted_text.split('\n') spans = [] for line in lines: parts = re.split(r'(.*?)', line) for part in parts: if part.startswith('') and part.endswith(''): word = part[4:-5] spans.append(TextSpan(word, TextStyle(weight=FontWeight.BOLD, color=colors.YELLOW_600))) else: spans.append(TextSpan(part + "\n")) p1_page_text.content.controls[0].spans = spans await page.update_async() async def p1_bookname(e): book_name = e.control.text e_search_df, response = await e_search_book(p1_query_feild.value, book_name) p1_res_dt.columns.clear() p1_res_dt.rows.clear() e_search_df = e_search_df[['Text', 'Score', 'Page']] occurrences_count = 0 query = remove_arabic_diacritics(p1_query_feild.value) query = normalize_arabic(query) for hit in response['hits']['hits']: text = hit['_source']['Text'] occurrences_count += text.count(query) p1_info_table.controls = [create_table(response['hits']['hits'][0]['_source']['Book'], e_search_df.shape[0], occurrences_count, 342)] translation = {"Book": "الكتاب", "Page": "الصفحه", "Score": "درجة التطابق", 'Text': "المحتوي"} for i in range(len(e_search_df.columns)): p1_res_dt.columns.append(DataColumn(Text(translation[e_search_df.columns[i]]))) pages_btns = [] for i in range(e_search_df.shape[0]): txt = e_search_df['Text'][i][:80].replace("\n", " ") p1_res_dt.rows.append(DataRow(cells=[ DataCell(Row([Text(f"{txt}...", width=550)])), DataCell(Text(e_search_df['Score'][i], width=300)), DataCell(ElevatedButton(e_search_df['Page'][i], on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name)), width=120)) ])) next_button = ElevatedButton( content=Row( controls=[ Text(" التالي"), Icon(name=icons.NAVIGATE_NEXT, size=25), ], alignment=MainAxisAlignment.CENTER ), on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name, 1)) ) previous_button = ElevatedButton( content=Row( controls=[ Icon(name=icons.NAVIGATE_BEFORE, size=25), Text("السابق "), ], alignment=MainAxisAlignment.CENTER ), on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name, -1)) ) page_num_widget = Row([Text(" "), Text("رقم الصفحة \n 50", weight=FontWeight.BOLD, text_align=TextAlign.CENTER), Text(" ")]) p1_pages_row.controls = [previous_button, page_num_widget, next_button] p1_pages_row.visible = False await page.update_async() def create_table(books, pages, hits, wid): def create_cell(content, is_header=False): return Container( content=Text(content, weight="bold" if is_header else None), border=border.all(1, "cyan"), padding=padding.all(8), border_radius=2, alignment=alignment.center, width=wid ) header = Row( controls=[ create_cell("التطابقات", is_header=True), create_cell("الصفحات", is_header=True), create_cell("الكتب", is_header=True) ], alignment="center", spacing=0 ) values = Row( controls=[ create_cell(hits), create_cell(pages), create_cell(books) ], alignment="center", spacing=0 ) table = Column( controls=[ header, values ], alignment="center", spacing=0 ) return table async def p1_send_button(e): global p1_first_run p1_datatable_row.visible = True p1_page_text.visible = False p1_pages_row.visible = False p1_res_dt.columns.clear() if p1_first_run >= 1: p1_res_dt.rows.clear() p1_first_run = 1 e_search_df, response_search = await e_search(p1_query_feild.value) e_search_df = e_search_df[['Top Two Pages', 'Count', 'Pages', 'Book']] translation = {"Book": "الكتاب", "Pages": "الصفحات", "Count": "التطابقات", 'Top Two Pages': "أعلى صفحتين متطابقتين"} occurrences_count = 0 query = remove_arabic_diacritics(p1_query_feild.value) query = normalize_arabic(query) for hit in response_search['hits']['hits']: text = hit['_source']['Text'] occurrences_count += text.count(query) p1_info_table.controls = [create_table(e_search_df.shape[0], e_search_df['Count'].sum(), occurrences_count, 342)] for i in range(len(e_search_df.columns)): p1_res_dt.columns.append(DataColumn(Text(translation[e_search_df.columns[i]]))) for i in range(e_search_df.shape[0]): occurrences_count = 0 for hit in response_search['hits']['hits']: if hit['_source']['Book'] == e_search_df['Book'][i]: text = hit['_source']['Text'] occurrences_count += text.count(query) p1_res_dt.rows.append(DataRow(cells=[ DataCell(Text(e_search_df['Top Two Pages'][i], width=200)), DataCell(Text(occurrences_count, width=120)), DataCell(Text(e_search_df['Count'][i], width=180)), DataCell(ElevatedButton(e_search_df['Book'][i], width=450, on_click=p1_bookname)), ])) await page.update_async() p1_res_dt = DataTable( columns=[DataColumn(Text())], border=border.all(2, "blue"), border_radius=10, column_spacing=10, ) p1_info_table = Row([Text("")], alignment=MainAxisAlignment.CENTER) p1_datatable_row = Column([Row([p1_res_dt], alignment=MainAxisAlignment.CENTER)], alignment=MainAxisAlignment.CENTER, scroll=ScrollMode.ALWAYS, height=398) p1_query_feild = TextField(label="Inquiry", hint_text=inquiry_text, expand=True, rtl=True) p1_query_send = FloatingActionButton(icon=icons.SEND, on_click=p1_send_button) p1_Query_row = Row(controls=[p1_query_feild, p1_query_send]) p1_page_text = Container( content=Column([Text("", rtl=True)], scroll=ScrollMode.ALWAYS), margin=10, padding=10, alignment=alignment.center, width=1050, height=400, border_radius=10, border=border.all(1, colors.CYAN), ) page_1 = Column([p1_Query_row, p1_info_table, p1_datatable_row, Row([Text(), p1_page_text, Text()], alignment=MainAxisAlignment.CENTER), Row([Text(), p1_pages_row, Text()], alignment=MainAxisAlignment.CENTER)]) p1_datatable_row.visible = False p1_page_text.visible = False p1_pages_row.visible = False await page.add_async(page_1) app = flet_fastapi.app(main)