Spaces:
Running
Running
neuralworm
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -3,340 +3,71 @@ logger = logging.getLogger(__name__)
|
|
3 |
logging.basicConfig(level=logging.INFO)
|
4 |
|
5 |
import gradio as gr
|
6 |
-
import
|
7 |
-
import bible
|
8 |
-
import quran
|
9 |
-
from utils import number_to_ordinal_word, custom_normalize, date_to_words, translate_date_to_words
|
10 |
from gematria import calculate_gematria, strip_diacritics
|
11 |
-
|
12 |
-
import pandas as pd
|
13 |
-
from deep_translator import GoogleTranslator
|
14 |
-
from gradio_calendar import Calendar
|
15 |
from datetime import datetime, timedelta
|
16 |
-
import math
|
17 |
import json
|
18 |
-
import re
|
19 |
-
import sqlite3
|
20 |
-
from collections import defaultdict
|
21 |
-
from typing import List, Tuple
|
22 |
-
|
23 |
-
# --- Constants ---
|
24 |
-
DATABASE_FILE = 'gematria.db'
|
25 |
-
MAX_PHRASE_LENGTH_LIMIT = 20
|
26 |
-
|
27 |
-
# --- Database Initialization ---
|
28 |
-
def initialize_database():
|
29 |
-
global conn
|
30 |
-
conn = sqlite3.connect(DATABASE_FILE)
|
31 |
-
cursor = conn.cursor()
|
32 |
-
cursor.execute('''
|
33 |
-
CREATE TABLE IF NOT EXISTS results (
|
34 |
-
gematria_sum INTEGER,
|
35 |
-
words TEXT,
|
36 |
-
translation TEXT,
|
37 |
-
book TEXT,
|
38 |
-
chapter INTEGER,
|
39 |
-
verse INTEGER,
|
40 |
-
phrase_length INTEGER,
|
41 |
-
word_position TEXT,
|
42 |
-
PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position)
|
43 |
-
)
|
44 |
-
''')
|
45 |
-
cursor.execute('''
|
46 |
-
CREATE INDEX IF NOT EXISTS idx_results_gematria
|
47 |
-
ON results (gematria_sum)
|
48 |
-
''')
|
49 |
-
cursor.execute('''
|
50 |
-
CREATE TABLE IF NOT EXISTS processed_books (
|
51 |
-
book TEXT PRIMARY KEY,
|
52 |
-
max_phrase_length INTEGER
|
53 |
-
)
|
54 |
-
''')
|
55 |
-
conn.commit()
|
56 |
-
|
57 |
-
# --- Initialize Database ---
|
58 |
-
initialize_database()
|
59 |
-
|
60 |
-
# --- Helper Functions (from Network app.py) ---
|
61 |
-
def flatten_text(text: List) -> str:
|
62 |
-
if isinstance(text, list):
|
63 |
-
return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
|
64 |
-
return text
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
SELECT words, book, chapter, verse, phrase_length, word_position
|
72 |
-
FROM results
|
73 |
-
WHERE gematria_sum = ? AND phrase_length <= ?
|
74 |
-
''', (gematria_sum, max_words))
|
75 |
-
results = cursor.fetchall()
|
76 |
-
return results
|
77 |
-
|
78 |
-
def get_most_frequent_phrase(results):
|
79 |
-
phrase_counts = defaultdict(int)
|
80 |
-
for words, book, chapter, verse, phrase_length, word_position in results:
|
81 |
-
phrase_counts[words] += 1
|
82 |
-
most_frequent_phrase = max(phrase_counts, key=phrase_counts.get) if phrase_counts else None # Handle empty results
|
83 |
-
return most_frequent_phrase
|
84 |
-
|
85 |
-
# --- Functions from BOS app.py ---
|
86 |
-
def create_language_dropdown(label, default_value='en', show_label=True):
|
87 |
-
languages = GoogleTranslator(source='en', target='en').get_supported_languages(as_dict=True)
|
88 |
-
return gr.Dropdown(
|
89 |
-
choices=list(languages.keys()),
|
90 |
-
label=label,
|
91 |
-
value=default_value,
|
92 |
-
show_label=show_label
|
93 |
-
)
|
94 |
-
|
95 |
-
def calculate_gematria_sum(text, date_words):
|
96 |
-
if text or date_words:
|
97 |
-
combined_input = f"{text} {date_words}"
|
98 |
-
numbers = re.findall(r'\d+', combined_input)
|
99 |
-
text_without_numbers = re.sub(r'\d+', '', combined_input)
|
100 |
-
number_sum = sum(int(number) for number in numbers)
|
101 |
-
text_gematria = calculate_gematria(strip_diacritics(text_without_numbers))
|
102 |
-
total_sum = text_gematria + number_sum
|
103 |
-
return total_sum
|
104 |
else:
|
105 |
return None
|
106 |
|
107 |
-
def
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
torah_results = []
|
112 |
-
bible_results = []
|
113 |
-
quran_results = []
|
114 |
|
115 |
-
|
116 |
-
|
|
|
117 |
|
118 |
-
|
119 |
-
|
120 |
|
121 |
-
|
122 |
-
|
|
|
|
|
123 |
|
124 |
-
|
125 |
-
results = []
|
126 |
-
max_length = max(len(torah_results), len(bible_results), len(quran_results))
|
127 |
-
for i in range(max_length):
|
128 |
-
if i < len(torah_results):
|
129 |
-
results.append(torah_results[i])
|
130 |
-
if i < len(bible_results):
|
131 |
-
results.append(bible_results[i])
|
132 |
-
if i < len(quran_results):
|
133 |
-
results.append(quran_results[i])
|
134 |
-
else:
|
135 |
-
results = torah_results + bible_results + quran_results
|
136 |
|
137 |
return results
|
138 |
|
139 |
-
def
|
140 |
-
"""Generates the JSON dump with configuration, date range, and results, including the initial step."""
|
141 |
-
config = {
|
142 |
-
"Start Book": start,
|
143 |
-
"End Book": end,
|
144 |
-
"Step": initial_step, # Use initial_step here
|
145 |
-
"Rounds": rounds_combination,
|
146 |
-
"Target Language": tlang,
|
147 |
-
"Strip Spaces": strip_spaces,
|
148 |
-
"Strip Text in Braces": strip_in_braces,
|
149 |
-
"Strip Diacritics": strip_diacritics_chk,
|
150 |
-
"Search Phrase": search_phrase
|
151 |
-
}
|
152 |
result = {
|
153 |
-
"Configuration": config,
|
154 |
"DateRange": {
|
155 |
"StartDate": start_date.strftime("%Y-%m-%d"),
|
156 |
"EndDate": end_date.strftime("%Y-%m-%d")
|
157 |
},
|
158 |
-
"Results":
|
159 |
}
|
160 |
-
logger.info(f"Generated JSON dump: {result}")
|
161 |
return json.dumps(result, indent=4, ensure_ascii=False)
|
162 |
|
163 |
-
def download_json_file(config_json, step, rounds_combination, strip_spaces, strip_in_braces, strip_diacritics_chk):
|
164 |
-
"""Downloads the JSON config file with a descriptive name."""
|
165 |
-
filename_suffix = ""
|
166 |
-
if strip_spaces:
|
167 |
-
filename_suffix += "-stSp"
|
168 |
-
if strip_in_braces:
|
169 |
-
filename_suffix += "-stBr"
|
170 |
-
if strip_diacritics_chk:
|
171 |
-
filename_suffix += "-stDc"
|
172 |
-
file_path = f"step-{step}-rounds-{rounds_combination}{filename_suffix}.json" # Include rounds in filename
|
173 |
-
with open(file_path, "w", encoding='utf-8') as file:
|
174 |
-
file.write(config_json)
|
175 |
-
logger.info(f"Downloaded JSON file to: {file_path}")
|
176 |
-
return file_path
|
177 |
-
|
178 |
-
|
179 |
# --- Main Gradio App ---
|
180 |
with gr.Blocks() as app:
|
181 |
with gr.Row():
|
182 |
-
start_date = Calendar(type="datetime", label="Start Date")
|
183 |
-
end_date = Calendar(type="datetime", label="End Date")
|
184 |
-
|
185 |
-
with gr.Row():
|
186 |
-
tlang = create_language_dropdown("Target Language for Translation", default_value='english')
|
187 |
-
date_language_input = create_language_dropdown("Language of the person/topic (optional) (Date Word Language)", default_value='english')
|
188 |
-
|
189 |
-
with gr.Row():
|
190 |
-
gematria_text = gr.Textbox(label="Name and/or Topic (required)", value="Hans Albert Einstein")
|
191 |
-
gematria_result = gr.Number(label="Journal Sum")
|
192 |
-
|
193 |
-
with gr.Row():
|
194 |
-
step = gr.Number(label="Jump Width (Steps) for ELS")
|
195 |
-
float_step = gr.Number(visible=False, value=1)
|
196 |
-
half_step_btn = gr.Button("Steps / 2")
|
197 |
-
double_step_btn = gr.Button("Steps * 2")
|
198 |
-
|
199 |
-
with gr.Column():
|
200 |
-
round_x = gr.Number(label="Round (1)", value=1)
|
201 |
-
round_y = gr.Number(label="Round (2)", value=-1)
|
202 |
-
|
203 |
-
rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1")
|
204 |
-
|
205 |
-
with gr.Row():
|
206 |
-
include_torah_chk = gr.Checkbox(label="Include Torah", value=True)
|
207 |
-
include_bible_chk = gr.Checkbox(label="Include Bible", value=True)
|
208 |
-
include_quran_chk = gr.Checkbox(label="Include Quran", value=True)
|
209 |
-
merge_results_chk = gr.Checkbox(label="Merge Results (Torah-Bible-Quran)", value=True)
|
210 |
|
211 |
-
|
212 |
-
|
213 |
-
strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True)
|
214 |
-
|
215 |
-
translate_btn = gr.Button("Search with ELS")
|
216 |
-
|
217 |
-
# --- Output Components ---
|
218 |
-
markdown_output = gr.Dataframe(label="ELS Results")
|
219 |
-
most_frequent_phrase_output = gr.Textbox(label="Most Frequent Phrase in Network Search")
|
220 |
-
json_output = gr.Textbox(label="JSON Configuration Output")
|
221 |
-
json_download_btn = gr.Button("Prepare .json for Download")
|
222 |
-
json_file = gr.File(label="Download Config JSON", file_count="single")
|
223 |
|
224 |
# --- Event Handlers ---
|
225 |
-
def
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
def update_rounds_combination(round_x, round_y):
|
230 |
-
return f"{int(round_x)},{int(round_y)}"
|
231 |
-
|
232 |
-
def update_step_half(float_step):
|
233 |
-
new_step = math.ceil(float_step / 2)
|
234 |
-
return new_step, float_step / 2
|
235 |
-
|
236 |
-
def update_step_double(float_step):
|
237 |
-
new_step = math.ceil(float_step * 2)
|
238 |
-
return new_step, float_step * 2
|
239 |
-
|
240 |
-
def perform_search(start_date, end_date, date_language_input, step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, merge_results, include_torah, include_bible, include_quran, gematria_text):
|
241 |
-
all_results = []
|
242 |
-
delta = timedelta(days=1)
|
243 |
-
current_date = start_date
|
244 |
-
|
245 |
-
initial_step = step # Store the initial step value
|
246 |
-
|
247 |
-
while current_date <= end_date:
|
248 |
-
date_words_output = translate_date_to_words(current_date, date_language_input)
|
249 |
-
journal_sum, _, _ = update_journal_sum(gematria_text, date_words_output)
|
250 |
-
step = journal_sum # Update step for each date
|
251 |
-
|
252 |
-
els_results = perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, merge_results, include_torah, include_bible, include_quran)
|
253 |
-
|
254 |
-
# --- Network Search Integration ---
|
255 |
-
updated_els_results = []
|
256 |
-
texts_to_translate = [] # Collect texts for batch translation
|
257 |
-
|
258 |
-
for result in els_results:
|
259 |
-
logger.debug(f"Processing result: {result}") # Add debugging log
|
260 |
-
try:
|
261 |
-
gematria_sum = calculate_gematria(result['result_text'])
|
262 |
-
texts_to_translate.append(result['result_text']) # Add text for translation
|
263 |
-
except KeyError as e:
|
264 |
-
print(f"DEBUG: KeyError - Key '{e.args[0]}' not found in result. Skipping this result.")
|
265 |
-
continue
|
266 |
-
|
267 |
-
max_words = len(result['result_text'].split())
|
268 |
-
matching_phrases = search_gematria_in_db(gematria_sum, max_words)
|
269 |
-
|
270 |
-
# Iteratively increase max_words if no results are found
|
271 |
-
max_words_limit = 20 # Set a limit for max_words
|
272 |
-
while not matching_phrases and max_words < max_words_limit:
|
273 |
-
max_words += 1
|
274 |
-
matching_phrases = search_gematria_in_db(gematria_sum, max_words)
|
275 |
-
|
276 |
-
# Find most frequent phrase or first phrase with lowest word count
|
277 |
-
if matching_phrases:
|
278 |
-
most_frequent_phrase = get_most_frequent_phrase(matching_phrases)
|
279 |
-
else:
|
280 |
-
# Sort initial results by word count and take the first phrase
|
281 |
-
sorted_results = sorted(search_gematria_in_db(gematria_sum, max_words_limit), key=lambda x: len(x[0].split()))
|
282 |
-
most_frequent_phrase = sorted_results[0][0] if sorted_results else ""
|
283 |
-
|
284 |
-
# Add most frequent phrase, date, and date_words to the result dictionary
|
285 |
-
result['Most Frequent Phrase'] = most_frequent_phrase
|
286 |
-
result['Date'] = current_date.strftime('%Y-%m-%d')
|
287 |
-
result['Date Words'] = date_words_output
|
288 |
-
updated_els_results.append(result)
|
289 |
-
|
290 |
-
# --- Batch Translation ---
|
291 |
-
translator = GoogleTranslator(source='auto', target=tlang)
|
292 |
-
translated_texts = translator.translate_batch(texts_to_translate)
|
293 |
-
|
294 |
-
# --- Add Translated Texts to Results ---
|
295 |
-
for i, result in enumerate(updated_els_results):
|
296 |
-
result['Translated Text'] = translated_texts[i]
|
297 |
-
|
298 |
-
all_results.extend(updated_els_results)
|
299 |
-
current_date += delta
|
300 |
-
|
301 |
-
# --- Prepare Dataframe ---
|
302 |
-
df = pd.DataFrame(all_results)
|
303 |
-
df.index = range(1, len(df) + 1)
|
304 |
-
df.reset_index(inplace=True)
|
305 |
-
df.rename(columns={'index': 'Result Number'}, inplace=True)
|
306 |
-
|
307 |
-
# Find the most frequent phrase across all dates
|
308 |
-
all_phrases = [result['Most Frequent Phrase'] for result in all_results]
|
309 |
-
most_frequent_phrase = max(set(all_phrases), key=all_phrases.count) if all_phrases else ""
|
310 |
-
|
311 |
-
# Generate JSON output
|
312 |
-
search_phrase = f"{gematria_text}" # Removed date_words_output as it's now included in each result
|
313 |
-
config_json = generate_json_dump(1, 180, initial_step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, search_phrase, df, start_date, end_date)
|
314 |
-
|
315 |
-
return df, most_frequent_phrase, config_json
|
316 |
-
|
317 |
-
def handle_json_download(config_json, step, rounds_combination, strip_spaces, strip_in_braces, strip_diacritics_chk):
|
318 |
-
"""Handles the download of the JSON config file."""
|
319 |
-
return download_json_file(config_json, step, rounds_combination, strip_spaces, strip_in_braces, strip_diacritics_chk)
|
320 |
-
|
321 |
|
322 |
# --- Event Triggers ---
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
double_step_btn.click(update_step_double, inputs=[float_step], outputs=[step, float_step])
|
328 |
-
|
329 |
-
translate_btn.click(
|
330 |
-
perform_search,
|
331 |
-
inputs=[start_date, end_date, date_language_input, step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, merge_results_chk, include_torah_chk, include_bible_chk, include_quran_chk, gematria_text],
|
332 |
-
outputs=[markdown_output, most_frequent_phrase_output, json_output]
|
333 |
-
)
|
334 |
-
|
335 |
-
json_download_btn.click(
|
336 |
-
handle_json_download,
|
337 |
-
inputs=[json_output, step, rounds_combination, strip_spaces, strip_in_braces, strip_diacritics_chk],
|
338 |
-
outputs=[json_file]
|
339 |
)
|
340 |
|
341 |
if __name__ == "__main__":
|
342 |
-
app.launch(share=False)
|
|
|
3 |
logging.basicConfig(level=logging.INFO)
|
4 |
|
5 |
import gradio as gr
|
6 |
+
from utils import date_to_words
|
|
|
|
|
|
|
7 |
from gematria import calculate_gematria, strip_diacritics
|
|
|
|
|
|
|
|
|
8 |
from datetime import datetime, timedelta
|
|
|
9 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
+
# --- Helper Functions ---
|
12 |
+
def calculate_gematria_sum(text):
|
13 |
+
if text:
|
14 |
+
text_gematria = calculate_gematria(strip_diacritics(text))
|
15 |
+
return text_gematria
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
else:
|
17 |
return None
|
18 |
|
19 |
+
def perform_gematria_calculation_for_date_range(start_date, end_date):
|
20 |
+
results = {}
|
21 |
+
delta = timedelta(days=1)
|
22 |
+
current_date = start_date
|
|
|
|
|
|
|
23 |
|
24 |
+
while current_date <= end_date:
|
25 |
+
date_words = date_to_words(current_date, 'english')
|
26 |
+
gematria_sum = calculate_gematria_sum(date_words)
|
27 |
|
28 |
+
if gematria_sum not in results:
|
29 |
+
results[gematria_sum] = []
|
30 |
|
31 |
+
results[gematria_sum].append({
|
32 |
+
"date": current_date.strftime('%Y-%m-%d'),
|
33 |
+
"date_words": date_words
|
34 |
+
})
|
35 |
|
36 |
+
current_date += delta
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
return results
|
39 |
|
40 |
+
def generate_json_output(results, start_date, end_date):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
result = {
|
|
|
42 |
"DateRange": {
|
43 |
"StartDate": start_date.strftime("%Y-%m-%d"),
|
44 |
"EndDate": end_date.strftime("%Y-%m-%d")
|
45 |
},
|
46 |
+
"Results": results
|
47 |
}
|
|
|
48 |
return json.dumps(result, indent=4, ensure_ascii=False)
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
# --- Main Gradio App ---
|
51 |
with gr.Blocks() as app:
|
52 |
with gr.Row():
|
53 |
+
start_date = gr.Calendar(type="datetime", label="Start Date")
|
54 |
+
end_date = gr.Calendar(type="datetime", label="End Date")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
+
calculate_btn = gr.Button("Calculate Gematria for Date Range")
|
57 |
+
json_output = gr.Textbox(label="JSON Output")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
# --- Event Handlers ---
|
60 |
+
def perform_calculation(start_date, end_date):
|
61 |
+
results = perform_gematria_calculation_for_date_range(start_date, end_date)
|
62 |
+
json_result = generate_json_output(results, start_date, end_date)
|
63 |
+
return json_result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
# --- Event Triggers ---
|
66 |
+
calculate_btn.click(
|
67 |
+
perform_calculation,
|
68 |
+
inputs=[start_date, end_date],
|
69 |
+
outputs=[json_output]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
)
|
71 |
|
72 |
if __name__ == "__main__":
|
73 |
+
app.launch(share=False)
|