ZillionParts-PDF2Doc

Sleeping

App Files Files Community

E-slam commited on Jun 28, 2024

Commit

1ec044c

verified ·

1 Parent(s): e2b1e7f

Update main.py

Browse files

Files changed (1) hide show

main.py +10 -380

main.py CHANGED Viewed

@@ -1,387 +1,17 @@
-import asyncio
-from flet import *
-import requests
-import json
-import pandas as pd
-import elasticsearch_serverless
-import re
-import os
-import flet_fastapi
-def remove_arabic_diacritics(text):
-    diacritics_pattern = re.compile(r'[\u064B-\u065F\u0670\u06D6-\u06ED]')
-    no_diacritics_text = re.sub(diacritics_pattern, '', text)
-    return no_diacritics_text
-diacritics = re.compile("""
-    ّ    | # Tashdid
-    َ    | # Fatha
-    ً    | # Tanwin Fath
-    ُ    | # Damma
-    ٌ    | # Tanwin Damm
-    ِ    | # Kasra
-    ٍ    | # Tanwin Kasr
-    ْ    | # Sukun
-    ـ      # Tatwil/Kashida
-""", re.VERBOSE)
-def normalize_arabic(text):
-    text = diacritics.sub('', text)
-    text = text.replace('أ', 'ا')
-    text = text.replace('إ', 'ا')
-    text = text.replace('آ', 'ا')
-    text = text.replace('ة', 'ه')
-    text = text.replace('ى', 'ي')
-    return text
-book_selected = False
-first_run = 0
-p1_first_run = 0
-from elasticsearch_serverless import Elasticsearch
-endpoint = "https://503a98874f6241968f251209ab393a45.us-central1.gcp.cloud.es.io:443"
-client = Elasticsearch(
-  endpoint,
-  api_key="SWZGTU5aQUJuNURpVDRSbmtZSGk6cXRSUFZDZ1lRR2k2Y3NvQW9JYjExUQ",
-  request_timeout=60, max_retries=3, retry_on_timeout=True
-)
-async def main(page: Page):
-    async def e_search(query):
-        query = remove_arabic_diacritics(query)
-        query = normalize_arabic(query)
-        j_query = {
-            "size": 250,
-            "query": {
-                "match_phrase": {
-                    "Text": query
-                }
-            }
-        }
-        response_search = await asyncio.to_thread(client.search, index="books_idx", body=j_query)
-        unique_books = {}
-        all_hits = response_search['hits']['hits']
-        filtered_hits = [hit for hit in all_hits if query in hit['_source']['Text']]
-        for hit in filtered_hits:
-            book = hit['_source']['Book']
-            page = hit['_source']['Page']
-            score = hit['_score']
-            if book not in unique_books:
-                unique_books[book] = {'Pages': {page: score}, 'Count': 1}
-            else:
-                if page not in unique_books[book]['Pages']:
-                    unique_books[book]['Pages'][page] = score
-                    unique_books[book]['Count'] += 1
-        book_data = []
-        for book, info in unique_books.items():
-            pages = sorted(info['Pages'].items())
-            book_data.append({'Book': book, 'Pages': [page for page, _ in pages], 'Scores': [score for _, score in pages], 'Count': info['Count']})
-        df = pd.DataFrame(book_data)
-        df = df.head(10)
-        def get_top_two(row):
-            sorted_row = sorted(zip(row['Pages'], row['Scores']), key=lambda x: x[1], reverse=True)
-            return [page for page, score in sorted_row[:2]]
-        try:
-            df['Top Two Pages'] = df.apply(get_top_two, axis=1)
-        except:
-            pass
-        return df, response_search
-    inquiry_text = "من فضلك اكتب استفسارك."
-    async def e_search_book(query, phrase_search=0):
-        if phrase_search == 0:
-            book_name = book_btn.text
-        else:
-            book_name = phrase_search
-        url_search = 'http://localhost:9202/books_01/_search'
-        query = remove_arabic_diacritics(query)
-        query = normalize_arabic(query)
-        j_query = {
-            "size": 50,
-            "query": {
-                "bool": {
-                    "must": [
-                        {
-                            "match_phrase": {
-                                "Text": query
-                            }
-                        }
-                    ],
-                    "filter": [
-                        {
-                            "term": {
-                                "Book.keyword": book_name
-                            }
-                        }
-                    ]
-                }
-            },
-            "highlight": {
-                "fields": {
-                    "Text": {}
-                }
-            }
-        }
-        response_search = await asyncio.to_thread(client.search, index="books_idx", body=j_query)
-        data = []
-        for hit in response_search['hits']['hits']:
-            book = hit['_source']['Book']
-            page = hit['_source']['Page']
-            score = hit['_score']
-            text = hit['_source']['Text']
-            data.append({
-                "Book": book,
-                "Page": page,
-                "Score": score,
-                "Text": text
-            })
-        df = pd.DataFrame(data)
-        return df, response_search
-    async def navigate_pages(e, page):
-        print(page)
-        print(df)
-    async def p1_page_text_fun(e, response_search, nav="None"):
-        p1_datatable_row.visible = False
-        p1_page_text.visible = True
-        p1_pages_row.visible = True
-        if nav == "None":
-            p1_pages_row.controls[1].controls[1].value = "رقم الصفحة \n {}".format(e.control.text)
-            page_num = e.control.text
-        else:
-            match = re.search(r'\d+', p1_pages_row.controls[1].controls[1].value)
-            if match:
-                page_number = match.group()
-            page_numbers = [int(item['_source']['Page']) for item in response_search['hits']['hits']]
-            page_index = page_numbers.index(int(page_number))
-            page_num = page_numbers[(page_index + nav)]
-            p1_pages_row.controls[1].controls[1].value = "رقم الصفحة \n {}".format(page_num)
-        filtered_data = [item for item in response_search['hits']['hits'] if item['_source']['Page'] == page_num]
-        highlight = filtered_data[0]['highlight']['Text']
-        txt = filtered_data[0]['_source']['Text']
-        highlight_phrases = []
-        for item in highlight:
-            matches = re.findall(r'<em>(.*?)</em>', item)
-            highlight_phrases.extend(matches)
-        highlight_phrases = list(set(highlight_phrases))
-        for phrase in highlight_phrases:
-            emphasized_phrase = f"<em>{phrase}</em>"
-            highlighted_text = txt.replace(phrase, emphasized_phrase)
-        lines = highlighted_text.split('\n')
-        spans = []
-        for line in lines:
-            parts = re.split(r'(<em>.*?</em>)', line)
-            for part in parts:
-                if part.startswith('<em>') and part.endswith('</em>'):
-                    word = part[4:-5]
-                    spans.append(TextSpan(word, TextStyle(weight=FontWeight.BOLD, color=colors.YELLOW_600)))
-                else:
-                    spans.append(TextSpan(part + "\n"))
-        p1_page_text.content.controls[0].spans = spans
-        await page.update_async()
-    async def p1_bookname(e):
-        book_name = e.control.text
-        e_search_df, response = await e_search_book(p1_query_feild.value, book_name)
-        p1_res_dt.columns.clear()
-        p1_res_dt.rows.clear()
-        e_search_df = e_search_df[['Text', 'Score', 'Page']]
-        occurrences_count = 0
-        query = remove_arabic_diacritics(p1_query_feild.value)
-        query = normalize_arabic(query)
-        for hit in response['hits']['hits']:
-            text = hit['_source']['Text']
-            occurrences_count += text.count(query)
-        p1_info_table.controls = [create_table(response['hits']['hits'][0]['_source']['Book'],
-                                               e_search_df.shape[0],
-                                               occurrences_count,
-                                               342)]
-        translation = {"Book": "الكتاب", "Page": "الصفحه", "Score": "درجة التطابق", 'Text': "المحتوي"}
-        for i in range(len(e_search_df.columns)):
-            p1_res_dt.columns.append(DataColumn(Text(translation[e_search_df.columns[i]])))
-        pages_btns = []
-        for i in range(e_search_df.shape[0]):
-            txt = e_search_df['Text'][i][:80].replace("\n", " ")
-            p1_res_dt.rows.append(DataRow(cells=[
-                DataCell(Row([Text(f"{txt}...", width=550)])),
-                DataCell(Text(e_search_df['Score'][i], width=300)),
-                DataCell(ElevatedButton(e_search_df['Page'][i],
-                                        on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name)), width=120))
-            ]))
-        next_button = ElevatedButton(
-            content=Row(
-                controls=[
-                    Text("  التالي"),
-                    Icon(name=icons.NAVIGATE_NEXT, size=25),
-                ],
-                alignment=MainAxisAlignment.CENTER
-            ),
-            on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name, 1))
-        )
-        previous_button = ElevatedButton(
-            content=Row(
-                controls=[
-                    Icon(name=icons.NAVIGATE_BEFORE, size=25),
-                    Text("السابق  "),
-                ],
-                alignment=MainAxisAlignment.CENTER
-            ),
-            on_click=lambda e, name=response: asyncio.create_task(p1_page_text_fun(e, name, -1))
-        )
-        page_num_widget = Row([Text("   "), Text("رقم الصفحة \n 50", weight=FontWeight.BOLD, text_align=TextAlign.CENTER), Text("   ")])
-        p1_pages_row.controls = [previous_button, page_num_widget, next_button]
-        p1_pages_row.visible = False
-        await page.update_async()
-    def create_table(books, pages, hits, wid):
-        def create_cell(content, is_header=False):
-            return Container(
-                content=Text(content, weight="bold" if is_header else None),
-                border=border.all(1, "cyan"),
-                padding=padding.all(8),
-                border_radius=2,
-                alignment=alignment.center,
-                width=wid
-            )
-        header = Row(
-            controls=[
-                create_cell("التطابقات", is_header=True),
-                create_cell("الصفحات", is_header=True),
-                create_cell("الكتب", is_header=True)
-            ],
-            alignment="center",
-            spacing=0
-        )
-        values = Row(
-            controls=[
-                create_cell(hits),
-                create_cell(pages),
-                create_cell(books)
-            ],
-            alignment="center",
-            spacing=0
-        )
-        table = Column(
-            controls=[
-                header,
-                values
-            ],
-            alignment="center",
-            spacing=0
-        )
-        return table
-    async def p1_send_button(e):
-        global p1_first_run
-        p1_datatable_row.visible = True
-        p1_page_text.visible = False
-        p1_pages_row.visible = False
-        p1_res_dt.columns.clear()
-        if p1_first_run >= 1:
-            p1_res_dt.rows.clear()
-        p1_first_run = 1
-        e_search_df, response_search = await e_search(p1_query_feild.value)
-        e_search_df = e_search_df[['Top Two Pages', 'Count', 'Pages', 'Book']]
-        translation = {"Book": "الكتاب", "Pages": "الصفحات", "Count": "التطابقات", 'Top Two Pages': "أعلى صفحتين متطابقتين"}
-        occurrences_count = 0
-        query = remove_arabic_diacritics(p1_query_feild.value)
-        query = normalize_arabic(query)
-        for hit in response_search['hits']['hits']:
-            text = hit['_source']['Text']
-            occurrences_count += text.count(query)
-        p1_info_table.controls = [create_table(e_search_df.shape[0], e_search_df['Count'].sum(), occurrences_count, 342)]
-        for i in range(len(e_search_df.columns)):
-            p1_res_dt.columns.append(DataColumn(Text(translation[e_search_df.columns[i]])))
-        for i in range(e_search_df.shape[0]):
-            occurrences_count = 0
-            for hit in response_search['hits']['hits']:
-                if hit['_source']['Book'] == e_search_df['Book'][i]:
-                    text = hit['_source']['Text']
-                    occurrences_count += text.count(query)
-            p1_res_dt.rows.append(DataRow(cells=[
-                DataCell(Text(e_search_df['Top Two Pages'][i], width=200)),
-                DataCell(Text(occurrences_count, width=120)),
-                DataCell(Text(e_search_df['Count'][i], width=180)),
-                DataCell(ElevatedButton(e_search_df['Book'][i], width=450, on_click=p1_bookname)),
-            ]))
-        await page.update_async()
-    p1_res_dt = DataTable(
-        columns=[DataColumn(Text())],
-        border=border.all(2, "blue"),
-        border_radius=10,
-        column_spacing=10,
-    )
-    p1_info_table = Row([Text("")], alignment=MainAxisAlignment.CENTER)
-    p1_datatable_row = Column([Row([p1_res_dt], alignment=MainAxisAlignment.CENTER)], alignment=MainAxisAlignment.CENTER, scroll=ScrollMode.ALWAYS, height=398)
-    p1_query_feild = TextField(label="Inquiry", hint_text=inquiry_text, expand=True, rtl=True)
-    p1_query_send = FloatingActionButton(icon=icons.SEND, on_click=p1_send_button)
-    p1_Query_row = Row(controls=[p1_query_feild, p1_query_send])
-    p1_page_text = Container(
-        content=Column([Text("", rtl=True)], scroll=ScrollMode.ALWAYS),
-        margin=10,
-        padding=10,
-        alignment=alignment.center,
-        width=1050,
-        height=400,
-        border_radius=10,
-        border=border.all(1, colors.CYAN),
-    )
-    page_1 = Column([p1_Query_row, p1_info_table, p1_datatable_row, Row([Text(), p1_page_text, Text()], alignment=MainAxisAlignment.CENTER),
-                     Row([Text(), p1_pages_row, Text()], alignment=MainAxisAlignment.CENTER)])
-    p1_datatable_row.visible = False
-    p1_page_text.visible = False
-    p1_pages_row.visible = False
-    await page.add_async(page_1)
-app = flet_fastapi.app(main)

+import subprocess
+subprocess.call(["pip", "install", "flet==0.23.2"])
+import os
+import time
+gh_token = os.getenv("gh_token")
+url_with_token = "https://" + gh_token + "@github.com/Eslam-Magdy-1297/ESearch_FletV01.git"
+os.system(f"git clone {url_with_token}")
+time.sleep(10)
+os.system("python ESearch_FletV01/app.py")