abdelhaqueidali commited on
Commit
58d5f4a
·
verified ·
1 Parent(s): d843ff1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +218 -0
app.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import sqlite3
3
+ import html
4
+ import re
5
+ import unicodedata
6
+ from typing import List, Dict
7
+
8
+ def normalize_text(text: str, language: str) -> str:
9
+ """Normalize text based on language rules."""
10
+ if not text:
11
+ return text
12
+
13
+ # Convert to lowercase and normalize Unicode
14
+ text = text.lower()
15
+ text = unicodedata.normalize('NFKD', text)
16
+
17
+ if language == "Arabic":
18
+ # Normalize Arabic alifs and remove diacritics
19
+ text = re.sub(r'[إأآا]', 'ا', text) # Normalize alifs
20
+ text = re.sub(r'[ىي]', 'ي', text) # Normalize ya
21
+ text = re.sub(r'[ةه]', 'ه', text) # Normalize ta marbuta and ha
22
+ # Remove Arabic diacritics (fatha, kasra, damma, etc.)
23
+ text = re.sub(r'[\u064B-\u065F\u0670]', '', text)
24
+
25
+ elif language == "French":
26
+ # Remove French diacritics by replacing accented characters with base characters
27
+ text = ''.join(c for c in unicodedata.normalize('NFD', text)
28
+ if not unicodedata.combining(c))
29
+
30
+ elif language == "Amazigh":
31
+ # Normalize Amazigh characters
32
+ text = text.replace('ⵕ', 'ⵔ') # Treat ⵕ the same as ⵔ
33
+ text = text.replace('ⵯ', '') # Make character ⵯ unnecessary
34
+
35
+ return text
36
+
37
+ def search_dictionary(search_term: str,
38
+ language: str,
39
+ exact_match: bool,
40
+ word_match: bool,
41
+ contains: bool,
42
+ starts_with: bool,
43
+ ends_with: bool) -> str:
44
+ if not search_term or search_term.isspace():
45
+ return "<p>Please enter a search term</p>"
46
+
47
+ conn = sqlite3.connect('asawal_amqran.db')
48
+ cursor = conn.cursor()
49
+
50
+ # Normalize search term according to language-specific rules
51
+ normalized_search = normalize_text(search_term, language)
52
+
53
+ search_columns = {
54
+ "Amazigh": ["word", "latin", "construct", "plural", "acc", "accneg", "inacc",
55
+ "variante", "feminine", "fem_construct", "fem_plural",
56
+ "fem_plural_construct", "exp_zgh"],
57
+ "Arabic": ["arabic", "exp_ara", "mean_ar"],
58
+ "French": ["french", "exp_fra"]
59
+ }.get(language, [])
60
+
61
+ if not search_columns:
62
+ return "<p>Please select a language</p>"
63
+
64
+ if not any([exact_match, word_match, contains, starts_with, ends_with]):
65
+ return "<p>Please select at least one search option</p>"
66
+
67
+ priority_results = []
68
+ seen_word_ids = set()
69
+
70
+ # Get all data and filter in Python to handle normalization properly
71
+ cursor.execute("SELECT * FROM lexie")
72
+ column_names = [desc[0] for desc in cursor.description]
73
+ word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
74
+ all_rows = cursor.fetchall()
75
+
76
+ for row in all_rows:
77
+ if word_id_idx == -1 or row[word_id_idx] in seen_word_ids:
78
+ continue
79
+
80
+ # Check each relevant column with normalization
81
+ for column_idx, column_name in enumerate(column_names):
82
+ if column_name not in search_columns:
83
+ continue
84
+
85
+ cell_value = row[column_idx]
86
+ if not cell_value:
87
+ continue
88
+
89
+ # Normalize the cell value according to language rules
90
+ normalized_cell = normalize_text(str(cell_value), language)
91
+
92
+ # Priority 1: Exact Match
93
+ if exact_match and normalized_cell == normalized_search:
94
+ seen_word_ids.add(row[word_id_idx])
95
+ priority_results.append((1, row))
96
+ break
97
+
98
+ # Priority 2: Word Match
99
+ elif word_match and (normalized_cell == normalized_search or
100
+ re.search(r'\b' + re.escape(normalized_search) + r'\b', normalized_cell)):
101
+ seen_word_ids.add(row[word_id_idx])
102
+ priority_results.append((2, row))
103
+ break
104
+
105
+ # Priority 3: Contains
106
+ elif contains and normalized_search in normalized_cell:
107
+ seen_word_ids.add(row[word_id_idx])
108
+ priority_results.append((3, row))
109
+ break
110
+
111
+ # Priority 4: Starts With
112
+ elif starts_with and normalized_cell.startswith(normalized_search):
113
+ seen_word_ids.add(row[word_id_idx])
114
+ priority_results.append((4, row))
115
+ break
116
+
117
+ # Priority 5: Ends With
118
+ elif ends_with and normalized_cell.endswith(normalized_search):
119
+ seen_word_ids.add(row[word_id_idx])
120
+ priority_results.append((5, row))
121
+ break
122
+
123
+ conn.close()
124
+
125
+ if not priority_results:
126
+ return "<p>No results found</p>"
127
+
128
+ # Sort by priority
129
+ priority_results.sort(key=lambda x: x[0])
130
+ results = [row for priority, row in priority_results]
131
+
132
+ # Format results as HTML
133
+ html_output = "<div style='font-family: Arial, sans-serif;'>"
134
+ if column_names:
135
+ for result in results:
136
+ result_dict = dict(zip(column_names, result))
137
+
138
+ html_output += "<div style='border: 1px solid #ccc; margin: 10px; padding: 15px; position: relative;'>"
139
+
140
+ if 'source' in result_dict and result_dict['source']:
141
+ html_output += f"<div style='text-align: center; font-style: italic;'>{html.escape(str(result_dict['source']))}</div>"
142
+ if 'category' in result_dict and result_dict['category']:
143
+ html_output += f"<div style='position: absolute; top: 10px; right: 10px; font-weight: bold;'>{html.escape(str(result_dict['category']))}</div>"
144
+
145
+ html_output += "<h3>Word</h3><ul>"
146
+ for field, label in [
147
+ ('word', 'Word'), ('latin', 'Latin'), ('construct', 'Construct'),
148
+ ('plural', 'Plural'), ('acc', 'Accusative'), ('accneg', 'Negative Accusative'),
149
+ ('inacc', 'Inaccusative'), ('variante', 'Variant'), ('feminine', 'Feminine'),
150
+ ('fem_construct', 'Feminine Construct'), ('fem_plural', 'Feminine Plural'),
151
+ ('fem_plural_construct', 'Feminine Plural Construct')
152
+ ]:
153
+ if field in result_dict and result_dict[field]:
154
+ html_output += f"<li><strong>{label}:</strong> {html.escape(str(result_dict[field]))}</li>"
155
+ html_output += "</ul>"
156
+
157
+ html_output += "<h3>Translations</h3><ul>"
158
+ if 'french' in result_dict and result_dict['french']:
159
+ html_output += f"<li><strong>French:</strong> {html.escape(str(result_dict['french']))}</li>"
160
+ if 'arabic' in result_dict and result_dict['arabic']:
161
+ html_output += f"<li><strong>Arabic:</strong> {html.escape(str(result_dict['arabic']))}</li>"
162
+ if 'mean_ar' in result_dict and result_dict['mean_ar']:
163
+ html_output += f"<li><strong>Arabic Meaning:</strong> {html.escape(str(result_dict['mean_ar']))}</li>"
164
+ html_output += "</ul>"
165
+
166
+ html_output += "<h3>Expressions</h3><ul>"
167
+ for field, label in [
168
+ ('exp_zgh', 'Amazigh Expression'), ('exp_fra', 'French Expression'),
169
+ ('exp_ara', 'Arabic Expression')
170
+ ]:
171
+ if field in result_dict and result_dict[field]:
172
+ html_output += f"<li><strong>{label}:</strong> {html.escape(str(result_dict[field]))}</li>"
173
+ html_output += "</ul>"
174
+
175
+ html_output += "</div>"
176
+ else:
177
+ html_output = "<p>No data found</p>"
178
+ html_output += "</div>"
179
+ return html_output
180
+
181
+ # Gradio interface
182
+ with gr.Blocks(title="Dictionary Search") as demo:
183
+ gr.Markdown("# Dictionary Search")
184
+
185
+ with gr.Row():
186
+ with gr.Column(scale=1):
187
+ search_input = gr.Textbox(label="Search Term", placeholder="Enter search term...")
188
+ search_button = gr.Button("Search")
189
+
190
+ gr.Markdown("### Language Options")
191
+ language = gr.Radio(
192
+ choices=["Amazigh", "Arabic", "French"],
193
+ label="Select Language",
194
+ value="Arabic"
195
+ )
196
+
197
+ gr.Markdown("### Search Options")
198
+ exact_match = gr.Checkbox(label="Exact Match (whole cell)", value=True)
199
+ word_match = gr.Checkbox(label="Exact Word Match (within cell)", value=True)
200
+ contains = gr.Checkbox(label="Contains", value=True)
201
+ starts_with = gr.Checkbox(label="Starts With", value=False)
202
+ ends_with = gr.Checkbox(label="Ends With", value=False)
203
+
204
+ with gr.Column(scale=3):
205
+ output = gr.HTML(label="Results")
206
+ search_params = [search_input, language, exact_match, word_match, contains, starts_with, ends_with]
207
+ search_input.submit(
208
+ search_dictionary,
209
+ inputs=search_params,
210
+ outputs=output
211
+ )
212
+ search_button.click(
213
+ search_dictionary,
214
+ inputs=search_params,
215
+ outputs=output
216
+ )
217
+
218
+ demo.launch()