Spaces:

abdelhaqueidali
/

Asawal-Amqran-Dictionary

Sleeping

App Files Files Community

abdelhaqueidali commited on Mar 16

Commit

58d5f4a

verified ·

1 Parent(s): d843ff1

Create app.py

Browse files

Files changed (1) hide show

app.py +218 -0

app.py ADDED Viewed

	@@ -0,0 +1,218 @@

+import gradio as gr
+import sqlite3
+import html
+import re
+import unicodedata
+from typing import List, Dict
+def normalize_text(text: str, language: str) -> str:
+    """Normalize text based on language rules."""
+    if not text:
+        return text
+    # Convert to lowercase and normalize Unicode
+    text = text.lower()
+    text = unicodedata.normalize('NFKD', text)
+    if language == "Arabic":
+        # Normalize Arabic alifs and remove diacritics
+        text = re.sub(r'[إأآا]', 'ا', text)  # Normalize alifs
+        text = re.sub(r'[ىي]', 'ي', text)    # Normalize ya
+        text = re.sub(r'[ةه]', 'ه', text)    # Normalize ta marbuta and ha
+        # Remove Arabic diacritics (fatha, kasra, damma, etc.)
+        text = re.sub(r'[\u064B-\u065F\u0670]', '', text)
+    elif language == "French":
+        # Remove French diacritics by replacing accented characters with base characters
+        text = ''.join(c for c in unicodedata.normalize('NFD', text)
+                       if not unicodedata.combining(c))
+    elif language == "Amazigh":
+        # Normalize Amazigh characters
+        text = text.replace('ⵕ', 'ⵔ')  # Treat ⵕ the same as ⵔ
+        text = text.replace('ⵯ', '')    # Make character ⵯ unnecessary
+    return text
+def search_dictionary(search_term: str,
+                     language: str,
+                     exact_match: bool,
+                     word_match: bool,
+                     contains: bool,
+                     starts_with: bool,
+                     ends_with: bool) -> str:
+    if not search_term or search_term.isspace():
+        return "<p>Please enter a search term</p>"
+    conn = sqlite3.connect('asawal_amqran.db')
+    cursor = conn.cursor()
+    # Normalize search term according to language-specific rules
+    normalized_search = normalize_text(search_term, language)
+    search_columns = {
+        "Amazigh": ["word", "latin", "construct", "plural", "acc", "accneg", "inacc",
+                   "variante", "feminine", "fem_construct", "fem_plural",
+                   "fem_plural_construct", "exp_zgh"],
+        "Arabic": ["arabic", "exp_ara", "mean_ar"],
+        "French": ["french", "exp_fra"]
+    }.get(language, [])
+    if not search_columns:
+        return "<p>Please select a language</p>"
+    if not any([exact_match, word_match, contains, starts_with, ends_with]):
+        return "<p>Please select at least one search option</p>"
+    priority_results = []
+    seen_word_ids = set()
+    # Get all data and filter in Python to handle normalization properly
+    cursor.execute("SELECT * FROM lexie")
+    column_names = [desc[0] for desc in cursor.description]
+    word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
+    all_rows = cursor.fetchall()
+    for row in all_rows:
+        if word_id_idx == -1 or row[word_id_idx] in seen_word_ids:
+            continue
+        # Check each relevant column with normalization
+        for column_idx, column_name in enumerate(column_names):
+            if column_name not in search_columns:
+                continue
+            cell_value = row[column_idx]
+            if not cell_value:
+                continue
+            # Normalize the cell value according to language rules
+            normalized_cell = normalize_text(str(cell_value), language)
+            # Priority 1: Exact Match
+            if exact_match and normalized_cell == normalized_search:
+                seen_word_ids.add(row[word_id_idx])
+                priority_results.append((1, row))
+                break
+            # Priority 2: Word Match
+            elif word_match and (normalized_cell == normalized_search or
+                                re.search(r'\b' + re.escape(normalized_search) + r'\b', normalized_cell)):
+                seen_word_ids.add(row[word_id_idx])
+                priority_results.append((2, row))
+                break
+            # Priority 3: Contains
+            elif contains and normalized_search in normalized_cell:
+                seen_word_ids.add(row[word_id_idx])
+                priority_results.append((3, row))
+                break
+            # Priority 4: Starts With
+            elif starts_with and normalized_cell.startswith(normalized_search):
+                seen_word_ids.add(row[word_id_idx])
+                priority_results.append((4, row))
+                break
+            # Priority 5: Ends With
+            elif ends_with and normalized_cell.endswith(normalized_search):
+                seen_word_ids.add(row[word_id_idx])
+                priority_results.append((5, row))
+                break
+    conn.close()
+    if not priority_results:
+        return "<p>No results found</p>"
+    # Sort by priority
+    priority_results.sort(key=lambda x: x[0])
+    results = [row for priority, row in priority_results]
+    # Format results as HTML
+    html_output = "<div style='font-family: Arial, sans-serif;'>"
+    if column_names:
+        for result in results:
+           result_dict = dict(zip(column_names, result))
+           html_output += "<div style='border: 1px solid #ccc; margin: 10px; padding: 15px; position: relative;'>"
+           if 'source' in result_dict and result_dict['source']:
+               html_output += f"<div style='text-align: center; font-style: italic;'>{html.escape(str(result_dict['source']))}</div>"
+           if 'category' in result_dict and result_dict['category']:
+               html_output += f"<div style='position: absolute; top: 10px; right: 10px; font-weight: bold;'>{html.escape(str(result_dict['category']))}</div>"
+           html_output += "<h3>Word</h3><ul>"
+           for field, label in [
+               ('word', 'Word'), ('latin', 'Latin'), ('construct', 'Construct'),
+               ('plural', 'Plural'), ('acc', 'Accusative'), ('accneg', 'Negative Accusative'),
+               ('inacc', 'Inaccusative'), ('variante', 'Variant'), ('feminine', 'Feminine'),
+               ('fem_construct', 'Feminine Construct'), ('fem_plural', 'Feminine Plural'),
+               ('fem_plural_construct', 'Feminine Plural Construct')
+           ]:
+               if field in result_dict and result_dict[field]:
+                   html_output += f"<li><strong>{label}:</strong> {html.escape(str(result_dict[field]))}</li>"
+           html_output += "</ul>"
+           html_output += "<h3>Translations</h3><ul>"
+           if 'french' in result_dict and result_dict['french']:
+               html_output += f"<li><strong>French:</strong> {html.escape(str(result_dict['french']))}</li>"
+           if 'arabic' in result_dict and result_dict['arabic']:
+               html_output += f"<li><strong>Arabic:</strong> {html.escape(str(result_dict['arabic']))}</li>"
+           if 'mean_ar' in result_dict and result_dict['mean_ar']:
+               html_output += f"<li><strong>Arabic Meaning:</strong> {html.escape(str(result_dict['mean_ar']))}</li>"
+           html_output += "</ul>"
+           html_output += "<h3>Expressions</h3><ul>"
+           for field, label in [
+               ('exp_zgh', 'Amazigh Expression'), ('exp_fra', 'French Expression'),
+               ('exp_ara', 'Arabic Expression')
+           ]:
+               if field in result_dict and result_dict[field]:
+                   html_output += f"<li><strong>{label}:</strong> {html.escape(str(result_dict[field]))}</li>"
+           html_output += "</ul>"
+           html_output += "</div>"
+    else:
+        html_output = "<p>No data found</p>"
+    html_output += "</div>"
+    return html_output
+# Gradio interface
+with gr.Blocks(title="Dictionary Search") as demo:
+    gr.Markdown("# Dictionary Search")
+    with gr.Row():
+        with gr.Column(scale=1):
+            search_input = gr.Textbox(label="Search Term", placeholder="Enter search term...")
+            search_button = gr.Button("Search")
+            gr.Markdown("### Language Options")
+            language = gr.Radio(
+                choices=["Amazigh", "Arabic", "French"],
+                label="Select Language",
+                value="Arabic"
+            )
+            gr.Markdown("### Search Options")
+            exact_match = gr.Checkbox(label="Exact Match (whole cell)", value=True)
+            word_match = gr.Checkbox(label="Exact Word Match (within cell)", value=True)
+            contains = gr.Checkbox(label="Contains", value=True)
+            starts_with = gr.Checkbox(label="Starts With", value=False)
+            ends_with = gr.Checkbox(label="Ends With", value=False)
+        with gr.Column(scale=3):
+            output = gr.HTML(label="Results")
+    search_params = [search_input, language, exact_match, word_match, contains, starts_with, ends_with]
+    search_input.submit(
+        search_dictionary,
+        inputs=search_params,
+        outputs=output
+    )
+    search_button.click(
+        search_dictionary,
+        inputs=search_params,
+        outputs=output
+    )
+demo.launch()