Spaces:

abdelhaqueidali
/

Asawal-Amqran-Dictionary

Running

File size: 10,510 Bytes

913bd94

import gradio as gr
import sqlite3
import unicodedata
from typing import List, Dict
import html
import re

def normalize_text(text: str, language: str) -> str:
    """Normalize text based on language-specific rules."""
    if not text:
        return text
    
    if language == "Arabic":
        text = text.replace('أ', 'ا').replace('إ', 'ا').replace('آ', 'ا')
        text = ''.join(c for c in unicodedata.normalize('NFKD', text)
                      if not unicodedata.category(c).startswith('M'))
    elif language == "French":
        text = ''.join(c for c in unicodedata.normalize('NFKD', text)
                      if not unicodedata.category(c).startswith('M'))
    elif language == "Amazigh":
        text = text.replace('ⵕ', 'ⵔ').replace('ⵯ', '')
    
    return text.lower()

def search_dictionary(search_term: str, 
                     language: str,
                     exact_match: bool,
                     word_match: bool,
                     contains: bool,
                     starts_with: bool,
                     ends_with: bool) -> str:
    if not search_term or search_term.isspace():
        return "<p>Please enter a search term</p>"
    
    conn = sqlite3.connect('asawal_amqran.db')
    cursor = conn.cursor()
    
    normalized_search = normalize_text(search_term, language)
    
    search_columns = {
        "Amazigh": ["word", "latin", "construct", "plural", "acc", "accneg", "inacc",
                   "variante", "feminine", "fem_construct", "fem_plural", 
                   "fem_plural_construct", "exp_zgh"],
        "Arabic": ["arabic", "exp_ara", "mean_ar"],
        "French": ["french", "exp_fra"]
    }.get(language, [])
    
    if not search_columns:
        return "<p>Please select a language</p>"
    
    if not any([exact_match, word_match, contains, starts_with, ends_with]):
        return "<p>Please select at least one search option</p>"
    
    # Store results with their priority level (lower number = higher priority)
    priority_results = []
    seen_word_ids = set()
    
    # Priority 1: Exact Match
    if exact_match:
        conditions = [f"LOWER({col}) = ?" for col in search_columns]
        query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
        params = [normalized_search] * len(search_columns)
        cursor.execute(query, params)
        column_names = [desc[0] for desc in cursor.description]
        word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
        for row in cursor.fetchall():
            if word_id_idx != -1:
                word_id = row[word_id_idx]
                if word_id not in seen_word_ids:
                    seen_word_ids.add(word_id)
                    priority_results.append((1, row))
    
    # Priority 2: Exact Word Match
    if word_match:
        conditions = []
        for col in search_columns:
            conditions.extend([
                f"LOWER({col}) = ?",
                f"LOWER({col}) LIKE ? AND LOWER({col}) NOT LIKE ?",
                f"LOWER({col}) LIKE ? AND LOWER({col}) NOT LIKE ?",
                f"LOWER({col}) LIKE ? AND LOWER({col}) NOT LIKE ?"
            ])
        query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
        params = []
        for _ in search_columns:
            params.extend([
                normalized_search,
                f"{normalized_search} %", f"%{normalized_search}%",
                f"% {normalized_search}", f"%{normalized_search}%",
                f"% {normalized_search} %", f"%{normalized_search}%"
            ])
        cursor.execute(query, params)
        column_names = [desc[0] for desc in cursor.description]
        word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
        for row in cursor.fetchall():
            if word_id_idx != -1:
                word_id = row[word_id_idx]
                if word_id not in seen_word_ids:
                    seen_word_ids.add(word_id)
                    priority_results.append((2, row))
    
    # Priority 3: Contains
    if contains:
        conditions = [f"LOWER({col}) LIKE ?" for col in search_columns]
        query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
        params = [f"%{normalized_search}%"] * len(search_columns)
        cursor.execute(query, params)
        column_names = [desc[0] for desc in cursor.description]
        word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
        for row in cursor.fetchall():
            if word_id_idx != -1:
                word_id = row[word_id_idx]
                if word_id not in seen_word_ids:
                    seen_word_ids.add(word_id)
                    priority_results.append((3, row))
    
    # Priority 4: Starts With
    if starts_with:
        conditions = [f"LOWER({col}) LIKE ?" for col in search_columns]
        query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
        params = [f"{normalized_search}%"] * len(search_columns)
        cursor.execute(query, params)
        column_names = [desc[0] for desc in cursor.description]
        word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
        for row in cursor.fetchall():
            if word_id_idx != -1:
                word_id = row[word_id_idx]
                if word_id not in seen_word_ids:
                    seen_word_ids.add(word_id)
                    priority_results.append((4, row))
    
    # Priority 5: Ends With
    if ends_with:
        conditions = [f"LOWER({col}) LIKE ?" for col in search_columns]
        query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
        params = [f"%{normalized_search}"] * len(search_columns)
        cursor.execute(query, params)
        column_names = [desc[0] for desc in cursor.description]
        word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
        for row in cursor.fetchall():
            if word_id_idx != -1:
                word_id = row[word_id_idx]
                if word_id not in seen_word_ids:
                    seen_word_ids.add(word_id)
                    priority_results.append((5, row))
    
    conn.close()
    
    if not priority_results:
        return "<p>No results found</p>"
    
    # Sort by priority (1 = exact match, 2 = word match, etc.)
    priority_results.sort(key=lambda x: x[0])
    results = [row for priority, row in priority_results]
    
    # Format results as HTML
    html_output = "<div style='font-family: Arial, sans-serif;'>"
    column_names = [desc[0] for desc in cursor.description]
    
    for result in results:
        result_dict = dict(zip(column_names, result))
        
        html_output += "<div style='border: 1px solid #ccc; margin: 10px; padding: 15px; position: relative;'>"
        
        if 'source' in result_dict and result_dict['source']:
            html_output += f"<div style='text-align: center; font-style: italic;'>{html.escape(result_dict['source'])}</div>"
        if 'category' in result_dict and result_dict['category']:
            html_output += f"<div style='position: absolute; top: 10px; right: 10px; font-weight: bold;'>{html.escape(result_dict['category'])}</div>"
        
        html_output += "<h3>Word</h3><ul>"
        for field, label in [
            ('word', 'Word'), ('latin', 'Latin'), ('construct', 'Construct'), 
            ('plural', 'Plural'), ('acc', 'Accusative'), ('accneg', 'Negative Accusative'),
            ('inacc', 'Inaccusative'), ('variante', 'Variant'), ('feminine', 'Feminine'),
            ('fem_construct', 'Feminine Construct'), ('fem_plural', 'Feminine Plural'),
            ('fem_plural_construct', 'Feminine Plural Construct')
        ]:
            if field in result_dict and result_dict[field]:
                html_output += f"<li><strong>{label}:</strong> {html.escape(result_dict[field])}</li>"
        html_output += "</ul>"
        
        html_output += "<h3>Translations</h3><ul>"
        if 'french' in result_dict and result_dict['french']:
            html_output += f"<li><strong>French:</strong> {html.escape(result_dict['french'])}</li>"
        if 'arabic' in result_dict and result_dict['arabic']:
            html_output += f"<li><strong>Arabic:</strong> {html.escape(result_dict['arabic'])}</li>"
        if 'mean_ar' in result_dict and result_dict['mean_ar']:
            html_output += f"<li><strong>Arabic Meaning:</strong> {html.escape(result_dict['mean_ar'])}</li>"
        html_output += "</ul>"
        
        html_output += "<h3>Expressions</h3><ul>"
        for field, label in [
            ('exp_zgh', 'Amazigh Expression'), ('exp_fra', 'French Expression'),
            ('exp_ara', 'Arabic Expression')
        ]:
            if field in result_dict and result_dict[field]:
                html_output += f"<li><strong>{label}:</strong> {html.escape(result_dict[field])}</li>"
        html_output += "</ul>"
        
        html_output += "</div>"
    
    html_output += "</div>"
    return html_output

# Gradio interface remains unchanged
with gr.Blocks(title="Dictionary Search") as demo:
    gr.Markdown("# Dictionary Search")
    
    with gr.Row():
        with gr.Column(scale=1):
            search_input = gr.Textbox(label="Search Term", placeholder="Enter search term...")
            search_button = gr.Button("Search")
            
            gr.Markdown("### Language Options")
            language = gr.Radio(
                choices=["Amazigh", "Arabic", "French"],
                label="Select Language",
                value="Arabic"
            )
            
            gr.Markdown("### Search Options")
            exact_match = gr.Checkbox(label="Exact Match (whole cell)", value=True)
            word_match = gr.Checkbox(label="Exact Word Match (within cell)", value=True)
            contains = gr.Checkbox(label="Contains", value=True)
            starts_with = gr.Checkbox(label="Starts With", value=False)
            ends_with = gr.Checkbox(label="Ends With", value=False)
        
        with gr.Column(scale=3):
            output = gr.HTML(label="Results")
    
    search_input.submit(
        search_dictionary,
        inputs=[search_input, language, exact_match, word_match, contains, starts_with, ends_with],
        outputs=output
    )
    search_button.click(
        search_dictionary,
        inputs=[search_input, language, exact_match, word_match, contains, starts_with, ends_with],
        outputs=output
    )

demo.launch()