Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import sqlite3
|
3 |
+
import re
|
4 |
+
from unidecode import unidecode
|
5 |
+
import arabic_reshaper
|
6 |
+
|
7 |
+
# Database connection
|
8 |
+
def get_db_connection():
|
9 |
+
conn = sqlite3.connect('asawal_amqran.db')
|
10 |
+
conn.row_factory = sqlite3.Row
|
11 |
+
return conn
|
12 |
+
|
13 |
+
# Arabic normalization
|
14 |
+
def normalize_arabic(text):
|
15 |
+
text = re.sub(r'[إأٱآا]', 'ا', text)
|
16 |
+
text = re.sub(r'[ًٌٍَُِْ]', '', text)
|
17 |
+
return text
|
18 |
+
|
19 |
+
# Amazigh normalization
|
20 |
+
def normalize_amazigh(text, additional_options=False):
|
21 |
+
text = text.replace('ⵕ', 'ⵔ')
|
22 |
+
text = text.replace('ⵯ', '')
|
23 |
+
if additional_options:
|
24 |
+
text = re.sub(r'(.)\1', r'\1', text)
|
25 |
+
text = text.replace('*', '%')
|
26 |
+
return text
|
27 |
+
|
28 |
+
# French normalization
|
29 |
+
def normalize_french(text):
|
30 |
+
return unidecode(text)
|
31 |
+
|
32 |
+
# Search function with dictionary-style output
|
33 |
+
def search_dictionary(query, search_type="general", language="amazigh", additional_options=False):
|
34 |
+
conn = get_db_connection()
|
35 |
+
cursor = conn.cursor()
|
36 |
+
|
37 |
+
# Prepare query based on language
|
38 |
+
if language == "amazigh":
|
39 |
+
columns = ['word', 'latin', 'construct', 'plural', 'acc', 'accneg',
|
40 |
+
'inacc', 'variante', 'feminine', 'frem_construct', 'fem_plural_construct', 'exp_zgh']
|
41 |
+
normalized_query = normalize_amazigh(query, additional_options)
|
42 |
+
elif language == "french":
|
43 |
+
columns = ['french', 'exp_fra']
|
44 |
+
normalized_query = normalize_french(query)
|
45 |
+
else: # arabic
|
46 |
+
columns = ['arabic', 'exp_ara', 'mean_ara']
|
47 |
+
normalized_query = normalize_arabic(query)
|
48 |
+
|
49 |
+
# Build SQL query based on search type
|
50 |
+
conditions = []
|
51 |
+
params = []
|
52 |
+
|
53 |
+
if search_type == "exact":
|
54 |
+
conditions = [f"{col} = ?" for col in columns]
|
55 |
+
params = [query] * len(columns)
|
56 |
+
elif search_type == "word":
|
57 |
+
conditions = [f"{col} LIKE ?" for col in columns]
|
58 |
+
params = [f"% {query} %" for _ in columns]
|
59 |
+
elif search_type == "contains":
|
60 |
+
conditions = [f"{col} LIKE ?" for col in columns]
|
61 |
+
params = [f"%{normalized_query}%" for _ in columns]
|
62 |
+
elif search_type == "starts":
|
63 |
+
conditions = [f"{col} LIKE ?" for col in columns]
|
64 |
+
params = [f"{normalized_query}%" for _ in columns]
|
65 |
+
elif search_type == "ends":
|
66 |
+
conditions = [f"{col} LIKE ?" for col in columns]
|
67 |
+
params = [f"%{normalized_query}" for _ in columns]
|
68 |
+
else: # general
|
69 |
+
conditions = []
|
70 |
+
params = []
|
71 |
+
conditions.extend([f"{col} = ?" for col in columns])
|
72 |
+
params.extend([query] * len(columns))
|
73 |
+
conditions.extend([f"{col} LIKE ?" for col in columns])
|
74 |
+
params.extend([f"% {query} %" for _ in columns])
|
75 |
+
conditions.extend([f"{col} LIKE ?" for col in columns])
|
76 |
+
params.extend([f"%{normalized_query}%" for _ in columns])
|
77 |
+
conditions.extend([f"{col} LIKE ?" for col in columns])
|
78 |
+
params.extend([f"{normalized_query}%" for _ in columns])
|
79 |
+
conditions.extend([f"{col} LIKE ?" for col in columns])
|
80 |
+
params.extend([f"%{normalized_query}" for _ in columns])
|
81 |
+
|
82 |
+
where_clause = " OR ".join(conditions)
|
83 |
+
query_sql = f"SELECT * FROM lexie WHERE {where_clause}"
|
84 |
+
|
85 |
+
cursor.execute(query_sql, params)
|
86 |
+
results = cursor.fetchall()
|
87 |
+
conn.close()
|
88 |
+
|
89 |
+
# Format results as dictionary-style HTML
|
90 |
+
if not results:
|
91 |
+
return "<h3>No results found</h3>"
|
92 |
+
|
93 |
+
html = "<div style='font-family: Arial, sans-serif; max-width: 800px;'>"
|
94 |
+
|
95 |
+
for i, row in enumerate(results, 1):
|
96 |
+
html += f"<div style='margin-bottom: 20px; padding: 10px; border-bottom: 1px solid #eee;'>"
|
97 |
+
html += f"<h3 style='color: #2c3e50; margin-bottom: 10px;'>Entry {i}</h3>"
|
98 |
+
|
99 |
+
# Main word
|
100 |
+
if row['word']:
|
101 |
+
html += f"<div><strong>{row['word']}</strong>"
|
102 |
+
if row['latin']:
|
103 |
+
html += f" ({row['latin']})"
|
104 |
+
html += "</div>"
|
105 |
+
|
106 |
+
# Morphological forms
|
107 |
+
forms = []
|
108 |
+
for field, label in [
|
109 |
+
('construct', 'Construct:'),
|
110 |
+
('plural', 'Plural:'),
|
111 |
+
('acc', 'Accusative:'),
|
112 |
+
('accneg', 'Acc. Negative:'),
|
113 |
+
('inacc', 'Inaccusative:'),
|
114 |
+
('variante', 'Variant:'),
|
115 |
+
('feminine', 'Feminine:'),
|
116 |
+
('frem_construct', 'Fem. Construct:'),
|
117 |
+
('fem_plural_construct', 'Fem. Pl. Construct:')
|
118 |
+
]:
|
119 |
+
if row[field]:
|
120 |
+
forms.append(f"{label} {row[field]}")
|
121 |
+
if forms:
|
122 |
+
html += "<div style='margin-left: 20px;'>" + "<br>".join(forms) + "</div>"
|
123 |
+
|
124 |
+
# Translations
|
125 |
+
translations = []
|
126 |
+
if row['french']:
|
127 |
+
translations.append(f"French: {row['french']}")
|
128 |
+
if row['arabic']:
|
129 |
+
translations.append(f"Arabic: {row['arabic']}")
|
130 |
+
if translations:
|
131 |
+
html += "<div style='margin-left: 20px; margin-top: 5px;'>" + "<br>".join(translations) + "</div>"
|
132 |
+
|
133 |
+
# Explanations
|
134 |
+
explanations = []
|
135 |
+
if row['exp_zgh']:
|
136 |
+
explanations.append(f"Amazigh Exp: {row['exp_zgh']}")
|
137 |
+
if row['exp_fra']:
|
138 |
+
explanations.append(f"French Exp: {row['exp_fra']}")
|
139 |
+
if row['exp_ara']:
|
140 |
+
explanations.append(f"Arabic Exp: {row['exp_ara']}")
|
141 |
+
if row['mean_ara']:
|
142 |
+
explanations.append(f"Arabic Meaning: {row['mean_ara']}")
|
143 |
+
if explanations:
|
144 |
+
html += "<div style='margin-left: 20px; margin-top: 5px; color: #666;'>" + "<br>".join(explanations) + "</div>"
|
145 |
+
|
146 |
+
html += "</div>"
|
147 |
+
|
148 |
+
html += "</div>"
|
149 |
+
return html
|
150 |
+
|
151 |
+
# Gradio interface
|
152 |
+
with gr.Blocks(title="Amazigh Dictionary") as demo:
|
153 |
+
gr.Markdown("# Amazigh Dictionary Search")
|
154 |
+
|
155 |
+
with gr.Row():
|
156 |
+
search_box = gr.Textbox(
|
157 |
+
placeholder="Enter search term and press Enter or click Search",
|
158 |
+
label="Search"
|
159 |
+
)
|
160 |
+
search_btn = gr.Button("Search")
|
161 |
+
|
162 |
+
with gr.Row():
|
163 |
+
search_type = gr.Radio(
|
164 |
+
choices=["general", "exact", "word", "contains", "starts", "ends"],
|
165 |
+
value="general",
|
166 |
+
label="Search Type"
|
167 |
+
)
|
168 |
+
|
169 |
+
with gr.Row():
|
170 |
+
language = gr.Radio(
|
171 |
+
choices=["amazigh", "french", "arabic"],
|
172 |
+
value="amazigh",
|
173 |
+
label="Language"
|
174 |
+
)
|
175 |
+
|
176 |
+
with gr.Row():
|
177 |
+
additional_options = gr.Checkbox(
|
178 |
+
label="Additional options (Amazigh only: normalize double letters, use * for missing letters)",
|
179 |
+
value=False
|
180 |
+
)
|
181 |
+
|
182 |
+
output = gr.HTML()
|
183 |
+
|
184 |
+
search_box.submit(
|
185 |
+
search_dictionary,
|
186 |
+
inputs=[search_box, search_type, language, additional_options],
|
187 |
+
outputs=output
|
188 |
+
)
|
189 |
+
search_btn.click(
|
190 |
+
search_dictionary,
|
191 |
+
inputs=[search_box, search_type, language, additional_options],
|
192 |
+
outputs=output
|
193 |
+
)
|
194 |
+
|
195 |
+
demo.launch()
|