|
import gradio as gr |
|
import sqlite3 |
|
import unicodedata |
|
from typing import List, Dict |
|
import html |
|
import re |
|
|
|
def normalize_text(text: str, language: str) -> str: |
|
"""Normalize text based on language-specific rules.""" |
|
if not text: |
|
return text |
|
|
|
if language == "Arabic": |
|
text = text.replace('أ', 'ا').replace('إ', 'ا').replace('آ', 'ا') |
|
text = ''.join(c for c in unicodedata.normalize('NFKD', text) |
|
if not unicodedata.category(c).startswith('M')) |
|
elif language == "French": |
|
text = ''.join(c for c in unicodedata.normalize('NFKD', text) |
|
if not unicodedata.category(c).startswith('M')) |
|
elif language == "Amazigh": |
|
text = text.replace('ⵕ', 'ⵔ').replace('ⵯ', '') |
|
|
|
return text.lower() |
|
|
|
def search_dictionary(search_term: str, |
|
language: str, |
|
exact_match: bool, |
|
word_match: bool, |
|
contains: bool, |
|
starts_with: bool, |
|
ends_with: bool) -> str: |
|
if not search_term or search_term.isspace(): |
|
return "<p>Please enter a search term</p>" |
|
|
|
conn = sqlite3.connect('asawal_amqran.db') |
|
cursor = conn.cursor() |
|
|
|
normalized_search = normalize_text(search_term, language) |
|
|
|
search_columns = { |
|
"Amazigh": ["word", "latin", "construct", "plural", "acc", "accneg", "inacc", |
|
"variante", "feminine", "fem_construct", "fem_plural", |
|
"fem_plural_construct", "exp_zgh"], |
|
"Arabic": ["arabic", "exp_ara", "mean_ar"], |
|
"French": ["french", "exp_fra"] |
|
}.get(language, []) |
|
|
|
if not search_columns: |
|
return "<p>Please select a language</p>" |
|
|
|
if not any([exact_match, word_match, contains, starts_with, ends_with]): |
|
return "<p>Please select at least one search option</p>" |
|
|
|
|
|
priority_results = [] |
|
seen_word_ids = set() |
|
|
|
|
|
if exact_match: |
|
conditions = [f"LOWER({col}) = ?" for col in search_columns] |
|
query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}" |
|
params = [normalized_search] * len(search_columns) |
|
cursor.execute(query, params) |
|
column_names = [desc[0] for desc in cursor.description] |
|
word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1 |
|
for row in cursor.fetchall(): |
|
if word_id_idx != -1: |
|
word_id = row[word_id_idx] |
|
if word_id not in seen_word_ids: |
|
seen_word_ids.add(word_id) |
|
priority_results.append((1, row)) |
|
|
|
|
|
if word_match and not priority_results: |
|
conditions = [] |
|
for col in search_columns: |
|
conditions.extend([ |
|
f"LOWER({col}) = ?", |
|
f"LOWER({col}) LIKE ? AND LOWER({col}) NOT LIKE ?", |
|
f"LOWER({col}) LIKE ? AND LOWER({col}) NOT LIKE ?", |
|
f"LOWER({col}) LIKE ? AND LOWER({col}) NOT LIKE ?" |
|
]) |
|
query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}" |
|
params = [] |
|
for _ in search_columns: |
|
params.extend([ |
|
normalized_search, |
|
f"{normalized_search} %", f"%{normalized_search}%", |
|
f"% {normalized_search}", f"%{normalized_search}%", |
|
f"% {normalized_search} %", f"%{normalized_search}%" |
|
]) |
|
cursor.execute(query, params) |
|
column_names = [desc[0] for desc in cursor.description] |
|
word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1 |
|
for row in cursor.fetchall(): |
|
if word_id_idx != -1: |
|
word_id = row[word_id_idx] |
|
if word_id not in seen_word_ids: |
|
seen_word_ids.add(word_id) |
|
priority_results.append((2, row)) |
|
|
|
|
|
if contains and not priority_results: |
|
conditions = [f"LOWER({col}) LIKE ?" for col in search_columns] |
|
query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}" |
|
params = [f"%{normalized_search}%"] * len(search_columns) |
|
cursor.execute(query, params) |
|
column_names = [desc[0] for desc in cursor.description] |
|
word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1 |
|
for row in cursor.fetchall(): |
|
if word_id_idx != -1: |
|
word_id = row[word_id_idx] |
|
if word_id not in seen_word_ids: |
|
seen_word_ids.add(word_id) |
|
priority_results.append((3, row)) |
|
|
|
|
|
if starts_with and not priority_results: |
|
conditions = [f"LOWER({col}) LIKE ?" for col in search_columns] |
|
query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}" |
|
params = [f"{normalized_search}%"] * len(search_columns) |
|
cursor.execute(query, params) |
|
column_names = [desc[0] for desc in cursor.description] |
|
word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1 |
|
for row in cursor.fetchall(): |
|
if word_id_idx != -1: |
|
word_id = row[word_id_idx] |
|
if word_id not in seen_word_ids: |
|
seen_word_ids.add(word_id) |
|
priority_results.append((4, row)) |
|
|
|
|
|
if ends_with and not priority_results: |
|
conditions = [f"LOWER({col}) LIKE ?" for col in search_columns] |
|
query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}" |
|
params = [f"%{normalized_search}"] * len(search_columns) |
|
cursor.execute(query, params) |
|
column_names = [desc[0] for desc in cursor.description] |
|
word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1 |
|
for row in cursor.fetchall(): |
|
if word_id_idx != -1: |
|
word_id = row[word_id_idx] |
|
if word_id not in seen_word_ids: |
|
seen_word_ids.add(word_id) |
|
priority_results.append((5, row)) |
|
|
|
conn.close() |
|
|
|
if not priority_results: |
|
return "<p>No results found</p>" |
|
|
|
priority_results.sort(key=lambda x: x[0]) |
|
results = [row for priority, row in priority_results] |
|
|
|
|
|
html_output = "<div style='font-family: Arial, sans-serif;'>" |
|
column_names = [desc[0] for desc in cursor.description] |
|
|
|
for result in results: |
|
result_dict = dict(zip(column_names, result)) |
|
|
|
html_output += "<div style='border: 1px solid #ccc; margin: 10px; padding: 15px; position: relative;'>" |
|
|
|
if 'source' in result_dict and result_dict['source']: |
|
html_output += f"<div style='text-align: center; font-style: italic;'>{html.escape(result_dict['source'])}</div>" |
|
if 'category' in result_dict and result_dict['category']: |
|
html_output += f"<div style='position: absolute; top: 10px; right: 10px; font-weight: bold;'>{html.escape(result_dict['category'])}</div>" |
|
|
|
html_output += "<h3>Word</h3><ul>" |
|
for field, label in [ |
|
('word', 'Word'), ('latin', 'Latin'), ('construct', 'Construct'), |
|
('plural', 'Plural'), ('acc', 'Accusative'), ('accneg', 'Negative Accusative'), |
|
('inacc', 'Inaccusative'), ('variante', 'Variant'), ('feminine', 'Feminine'), |
|
('fem_construct', 'Feminine Construct'), ('fem_plural', 'Feminine Plural'), |
|
('fem_plural_construct', 'Feminine Plural Construct') |
|
]: |
|
if field in result_dict and result_dict[field]: |
|
html_output += f"<li><strong>{label}:</strong> {html.escape(result_dict[field])}</li>" |
|
html_output += "</ul>" |
|
|
|
html_output += "<h3>Translations</h3><ul>" |
|
if 'french' in result_dict and result_dict['french']: |
|
html_output += f"<li><strong>French:</strong> {html.escape(result_dict['french'])}</li>" |
|
if 'arabic' in result_dict and result_dict['arabic']: |
|
html_output += f"<li><strong>Arabic:</strong> {html.escape(result_dict['arabic'])}</li>" |
|
if 'mean_ar' in result_dict and result_dict['mean_ar']: |
|
html_output += f"<li><strong>Arabic Meaning:</strong> {html.escape(result_dict['mean_ar'])}</li>" |
|
html_output += "</ul>" |
|
|
|
html_output += "<h3>Expressions</h3><ul>" |
|
for field, label in [ |
|
('exp_zgh', 'Amazigh Expression'), ('exp_fra', 'French Expression'), |
|
('exp_ara', 'Arabic Expression') |
|
]: |
|
if field in result_dict and result_dict[field]: |
|
html_output += f"<li><strong>{label}:</strong> {html.escape(result_dict[field])}</li>" |
|
html_output += "</ul>" |
|
|
|
html_output += "</div>" |
|
|
|
html_output += "</div>" |
|
return html_output |
|
|
|
|
|
with gr.Blocks(title="Dictionary Search") as demo: |
|
gr.Markdown("# Dictionary Search") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
search_input = gr.Textbox(label="Search Term", placeholder="Enter search term...") |
|
search_button = gr.Button("Search") |
|
|
|
gr.Markdown("### Language Options") |
|
language = gr.Radio( |
|
choices=["Amazigh", "Arabic", "French"], |
|
label="Select Language", |
|
value="Arabic" |
|
) |
|
|
|
gr.Markdown("### Search Options") |
|
exact_match = gr.Checkbox(label="Exact Match (whole cell)", value=True) |
|
word_match = gr.Checkbox(label="Exact Word Match (within cell)", value=True) |
|
contains = gr.Checkbox(label="Contains", value=True) |
|
starts_with = gr.Checkbox(label="Starts With", value=False) |
|
ends_with = gr.Checkbox(label="Ends With", value=False) |
|
|
|
with gr.Column(scale=3): |
|
output = gr.HTML(label="Results") |
|
|
|
search_input.submit( |
|
search_dictionary, |
|
inputs=[search_input, language, exact_match, word_match, contains, starts_with, ends_with], |
|
outputs=output |
|
) |
|
search_button.click( |
|
search_dictionary, |
|
inputs=[search_input, language, exact_match, word_match, contains, starts_with, ends_with], |
|
outputs=output |
|
) |
|
|
|
demo.launch() |