abdelhaqueidali's picture
Rename app.py to appv2.5.py
b923572 verified
import gradio as gr
import sqlite3
import unicodedata
from typing import List, Dict
import html
import re
def normalize_text(text: str, language: str) -> str:
"""Normalize text based on language-specific rules."""
if not text:
return text
if language == "Arabic":
text = text.replace('أ', 'ا').replace('إ', 'ا').replace('آ', 'ا')
text = ''.join(c for c in unicodedata.normalize('NFKD', text)
if not unicodedata.category(c).startswith('M'))
elif language == "French":
text = ''.join(c for c in unicodedata.normalize('NFKD', text)
if not unicodedata.category(c).startswith('M'))
elif language == "Amazigh":
text = text.replace('ⵕ', 'ⵔ').replace('ⵯ', '')
return text.lower()
def search_dictionary(search_term: str,
language: str,
exact_match: bool,
word_match: bool,
contains: bool,
starts_with: bool,
ends_with: bool) -> str:
if not search_term or search_term.isspace():
return "<p>Please enter a search term</p>"
conn = sqlite3.connect('asawal_amqran.db')
cursor = conn.cursor()
normalized_search = normalize_text(search_term, language)
search_columns = {
"Amazigh": ["word", "latin", "construct", "plural", "acc", "accneg", "inacc",
"variante", "feminine", "fem_construct", "fem_plural",
"fem_plural_construct", "exp_zgh"],
"Arabic": ["arabic", "exp_ara", "mean_ar"],
"French": ["french", "exp_fra"]
}.get(language, [])
if not search_columns:
return "<p>Please select a language</p>"
if not any([exact_match, word_match, contains, starts_with, ends_with]):
return "<p>Please select at least one search option</p>"
# Store results with their priority level (lower number = higher priority)
priority_results = []
seen_word_ids = set()
# Priority 1: Exact Match
if exact_match:
conditions = [f"LOWER({col}) = ?" for col in search_columns]
query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
params = [normalized_search] * len(search_columns)
cursor.execute(query, params)
column_names = [desc[0] for desc in cursor.description]
word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
for row in cursor.fetchall():
if word_id_idx != -1:
word_id = row[word_id_idx]
if word_id not in seen_word_ids:
seen_word_ids.add(word_id)
priority_results.append((1, row))
# Priority 2: Exact Word Match
if word_match:
conditions = []
for col in search_columns:
conditions.extend([
f"LOWER({col}) = ?",
f"LOWER({col}) LIKE ? AND LOWER({col}) NOT LIKE ?",
f"LOWER({col}) LIKE ? AND LOWER({col}) NOT LIKE ?",
f"LOWER({col}) LIKE ? AND LOWER({col}) NOT LIKE ?"
])
query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
params = []
for _ in search_columns:
params.extend([
normalized_search,
f"{normalized_search} %", f"%{normalized_search}%",
f"% {normalized_search}", f"%{normalized_search}%",
f"% {normalized_search} %", f"%{normalized_search}%"
])
cursor.execute(query, params)
column_names = [desc[0] for desc in cursor.description]
word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
for row in cursor.fetchall():
if word_id_idx != -1:
word_id = row[word_id_idx]
if word_id not in seen_word_ids:
seen_word_ids.add(word_id)
priority_results.append((2, row))
# Priority 3: Contains
if contains:
conditions = [f"LOWER({col}) LIKE ?" for col in search_columns]
query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
params = [f"%{normalized_search}%"] * len(search_columns)
cursor.execute(query, params)
column_names = [desc[0] for desc in cursor.description]
word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
for row in cursor.fetchall():
if word_id_idx != -1:
word_id = row[word_id_idx]
if word_id not in seen_word_ids:
seen_word_ids.add(word_id)
priority_results.append((3, row))
# Priority 4: Starts With
if starts_with:
conditions = [f"LOWER({col}) LIKE ?" for col in search_columns]
query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
params = [f"{normalized_search}%"] * len(search_columns)
cursor.execute(query, params)
column_names = [desc[0] for desc in cursor.description]
word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
for row in cursor.fetchall():
if word_id_idx != -1:
word_id = row[word_id_idx]
if word_id not in seen_word_ids:
seen_word_ids.add(word_id)
priority_results.append((4, row))
# Priority 5: Ends With
if ends_with:
conditions = [f"LOWER({col}) LIKE ?" for col in search_columns]
query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
params = [f"%{normalized_search}"] * len(search_columns)
cursor.execute(query, params)
column_names = [desc[0] for desc in cursor.description]
word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
for row in cursor.fetchall():
if word_id_idx != -1:
word_id = row[word_id_idx]
if word_id not in seen_word_ids:
seen_word_ids.add(word_id)
priority_results.append((5, row))
conn.close()
if not priority_results:
return "<p>No results found</p>"
# Sort by priority (1 = exact match, 2 = word match, etc.)
priority_results.sort(key=lambda x: x[0])
results = [row for priority, row in priority_results]
# Format results as HTML
html_output = "<div style='font-family: Arial, sans-serif;'>"
column_names = [desc[0] for desc in cursor.description]
for result in results:
result_dict = dict(zip(column_names, result))
html_output += "<div style='border: 1px solid #ccc; margin: 10px; padding: 15px; position: relative;'>"
if 'source' in result_dict and result_dict['source']:
html_output += f"<div style='text-align: center; font-style: italic;'>{html.escape(result_dict['source'])}</div>"
if 'category' in result_dict and result_dict['category']:
html_output += f"<div style='position: absolute; top: 10px; right: 10px; font-weight: bold;'>{html.escape(result_dict['category'])}</div>"
html_output += "<h3>Word</h3><ul>"
for field, label in [
('word', 'Word'), ('latin', 'Latin'), ('construct', 'Construct'),
('plural', 'Plural'), ('acc', 'Accusative'), ('accneg', 'Negative Accusative'),
('inacc', 'Inaccusative'), ('variante', 'Variant'), ('feminine', 'Feminine'),
('fem_construct', 'Feminine Construct'), ('fem_plural', 'Feminine Plural'),
('fem_plural_construct', 'Feminine Plural Construct')
]:
if field in result_dict and result_dict[field]:
html_output += f"<li><strong>{label}:</strong> {html.escape(result_dict[field])}</li>"
html_output += "</ul>"
html_output += "<h3>Translations</h3><ul>"
if 'french' in result_dict and result_dict['french']:
html_output += f"<li><strong>French:</strong> {html.escape(result_dict['french'])}</li>"
if 'arabic' in result_dict and result_dict['arabic']:
html_output += f"<li><strong>Arabic:</strong> {html.escape(result_dict['arabic'])}</li>"
if 'mean_ar' in result_dict and result_dict['mean_ar']:
html_output += f"<li><strong>Arabic Meaning:</strong> {html.escape(result_dict['mean_ar'])}</li>"
html_output += "</ul>"
html_output += "<h3>Expressions</h3><ul>"
for field, label in [
('exp_zgh', 'Amazigh Expression'), ('exp_fra', 'French Expression'),
('exp_ara', 'Arabic Expression')
]:
if field in result_dict and result_dict[field]:
html_output += f"<li><strong>{label}:</strong> {html.escape(result_dict[field])}</li>"
html_output += "</ul>"
html_output += "</div>"
html_output += "</div>"
return html_output
# Gradio interface remains unchanged
with gr.Blocks(title="Dictionary Search") as demo:
gr.Markdown("# Dictionary Search")
with gr.Row():
with gr.Column(scale=1):
search_input = gr.Textbox(label="Search Term", placeholder="Enter search term...")
search_button = gr.Button("Search")
gr.Markdown("### Language Options")
language = gr.Radio(
choices=["Amazigh", "Arabic", "French"],
label="Select Language",
value="Arabic"
)
gr.Markdown("### Search Options")
exact_match = gr.Checkbox(label="Exact Match (whole cell)", value=True)
word_match = gr.Checkbox(label="Exact Word Match (within cell)", value=True)
contains = gr.Checkbox(label="Contains", value=True)
starts_with = gr.Checkbox(label="Starts With", value=False)
ends_with = gr.Checkbox(label="Ends With", value=False)
with gr.Column(scale=3):
output = gr.HTML(label="Results")
search_input.submit(
search_dictionary,
inputs=[search_input, language, exact_match, word_match, contains, starts_with, ends_with],
outputs=output
)
search_button.click(
search_dictionary,
inputs=[search_input, language, exact_match, word_match, contains, starts_with, ends_with],
outputs=output
)
demo.launch()