Spaces:
Sleeping
Sleeping
neuralworm
commited on
Commit
•
cfc0aba
1
Parent(s):
c6dfbe1
initial commit
Browse files
app.py
CHANGED
@@ -28,6 +28,7 @@ from fuzzywuzzy import fuzz
|
|
28 |
import calendar
|
29 |
import translation_utils
|
30 |
import hashlib
|
|
|
31 |
|
32 |
translation_utils.create_translation_table()
|
33 |
|
@@ -166,110 +167,29 @@ def calculate_gematria_sum(text, date_words):
|
|
166 |
else:
|
167 |
return None
|
168 |
|
169 |
-
def perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah, include_bible, include_quran, include_hindu, include_tripitaka):
|
170 |
-
if step == 0 or rounds_combination == "0,0":
|
171 |
-
return None
|
172 |
-
|
173 |
-
results = {}
|
174 |
-
length = 0
|
175 |
-
|
176 |
-
selected_language_long = tlang # From the Gradio dropdown (long form)
|
177 |
-
tlang = LANGUAGES_SUPPORTED.get(selected_language_long) #Get the short code.
|
178 |
-
if tlang is None: # Handle unsupported languages
|
179 |
-
tlang = "en"
|
180 |
-
logger.warning(f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).")
|
181 |
-
|
182 |
-
if include_torah:
|
183 |
-
logger.debug(f"Arguments for Torah: {(1, 39, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)}")
|
184 |
-
results["Torah"] = cached_process_json_files(torah.process_json_files, 1, 39, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
|
185 |
-
else:
|
186 |
-
results["Torah"] = []
|
187 |
-
|
188 |
-
if include_bible:
|
189 |
-
results["Bible"] = cached_process_json_files(bible.process_json_files, 40, 66, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
|
190 |
-
else:
|
191 |
-
results["Bible"] = []
|
192 |
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
results
|
197 |
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
|
|
|
|
|
|
|
|
|
|
202 |
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
|
|
207 |
|
208 |
-
return
|
209 |
-
|
210 |
-
|
211 |
-
def add_24h_projection(results_dict): #Now takes a dictionary of results
|
212 |
-
for book_name, results in results_dict.items(): # Iterate per book
|
213 |
-
num_results = len(results)
|
214 |
-
if num_results > 0:
|
215 |
-
time_interval = timedelta(minutes=24 * 60 / num_results)
|
216 |
-
current_time = datetime.min.time()
|
217 |
-
for i in range(num_results):
|
218 |
-
next_time = (datetime.combine(datetime.min, current_time) + time_interval).time()
|
219 |
-
time_range_str = f"{current_time.strftime('%H:%M')}-{next_time.strftime('%H:%M')}"
|
220 |
-
results[i]['24h Projection'] = time_range_str
|
221 |
-
current_time = next_time
|
222 |
-
return results_dict
|
223 |
-
|
224 |
-
|
225 |
-
def add_monthly_projection(results_dict, selected_date):
|
226 |
-
if selected_date is None:
|
227 |
-
return results_dict # Return if no date is selected
|
228 |
-
|
229 |
-
for book_name, results in results_dict.items(): # Iterate per book
|
230 |
-
num_results = len(results)
|
231 |
-
if num_results > 0:
|
232 |
-
days_in_month = calendar.monthrange(selected_date.year, selected_date.month)[1]
|
233 |
-
total_seconds = (days_in_month - 1) * 24 * 3600
|
234 |
-
seconds_interval = total_seconds / num_results
|
235 |
-
start_datetime = datetime(selected_date.year, selected_date.month, 1)
|
236 |
-
current_datetime = start_datetime
|
237 |
-
|
238 |
-
|
239 |
-
for i in range(num_results):
|
240 |
-
next_datetime = current_datetime + timedelta(seconds=seconds_interval)
|
241 |
-
current_date = current_datetime.date() # Moved assignment inside loop
|
242 |
-
next_date = next_datetime.date()
|
243 |
-
date_range_str = f"{current_date.strftime('%h %d')} - {next_date.strftime('%h %d')}"
|
244 |
-
results[i]['Monthly Projection'] = date_range_str
|
245 |
-
current_datetime = next_datetime # Add this
|
246 |
-
current_date = next_datetime.date() # Add this too
|
247 |
-
return results_dict
|
248 |
-
|
249 |
-
|
250 |
-
def add_yearly_projection(results_dict, selected_date): #Correct name, handle dictionary input
|
251 |
-
if selected_date is None:
|
252 |
-
return results_dict # Return if no date is selected
|
253 |
-
|
254 |
-
for book_name, results in results_dict.items(): # Iterate per book
|
255 |
-
num_results = len(results)
|
256 |
-
if num_results > 0:
|
257 |
-
days_in_year = 366 if calendar.isleap(selected_date.year) else 365
|
258 |
-
total_seconds = (days_in_year - 1) * 24 * 3600
|
259 |
-
seconds_interval = total_seconds / num_results
|
260 |
-
start_datetime = datetime(selected_date.year, 1, 1)
|
261 |
-
current_datetime = start_datetime
|
262 |
-
|
263 |
-
|
264 |
-
for i in range(num_results):
|
265 |
-
next_datetime = current_datetime + timedelta(seconds=seconds_interval)
|
266 |
-
current_date = current_datetime.date() # Move assignment inside loop
|
267 |
-
next_date = next_datetime.date()
|
268 |
-
date_range_str = f"{current_date.strftime('%b %d')} - {next_date.strftime('%b %d')}"
|
269 |
-
results[i]['Yearly Projection'] = date_range_str
|
270 |
-
current_datetime = next_datetime # Update current datetime for next iteration
|
271 |
-
|
272 |
-
return results_dict
|
273 |
|
274 |
|
275 |
def sort_results(results):
|
@@ -290,24 +210,17 @@ with gr.Blocks() as app:
|
|
290 |
with gr.Column():
|
291 |
with gr.Row():
|
292 |
tlang = create_language_dropdown("Target Language for Result Translation", default_value='english')
|
293 |
-
|
|
|
294 |
use_day = gr.Checkbox(label="Use Day", info="Check to include day in search", value=True)
|
295 |
use_month = gr.Checkbox(label="Use Month", info="Check to include month in search", value=True)
|
296 |
use_year = gr.Checkbox(label="Use Year", info="Check to include year in search", value=True)
|
297 |
date_language_input = create_language_dropdown("Language of the person/topic (optional) (Date Word Language)", default_value='english')
|
298 |
with gr.Row():
|
299 |
gematria_text = gr.Textbox(label="Name and/or Topic (required)", value="Hans Albert Einstein Mileva Marity-Einstein")
|
300 |
-
date_words_output = gr.Textbox(label="Date in Words Translated (optional)")
|
301 |
-
gematria_result = gr.Number(label="Journal Sum")
|
302 |
-
#with gr.Row():
|
303 |
|
304 |
|
305 |
with gr.Row():
|
306 |
-
step = gr.Number(label="Jump Width (Steps) for ELS")
|
307 |
-
float_step = gr.Number(visible=False, value=1)
|
308 |
-
half_step_btn = gr.Button("Steps / 2")
|
309 |
-
double_step_btn = gr.Button("Steps * 2")
|
310 |
-
|
311 |
with gr.Column():
|
312 |
round_x = gr.Number(label="Round (1)", value=1)
|
313 |
round_y = gr.Number(label="Round (2)", value=-1)
|
@@ -335,48 +248,9 @@ with gr.Blocks() as app:
|
|
335 |
|
336 |
# --- Event Handlers ---
|
337 |
|
338 |
-
def update_date_words(selected_date, date_language_input, use_day, use_month, use_year):
|
339 |
-
if selected_date is None:
|
340 |
-
return ""
|
341 |
-
|
342 |
-
if not use_year and not use_month and not use_day:
|
343 |
-
return translate_date_to_words(selected_date, date_language_input)
|
344 |
-
|
345 |
-
year = selected_date.year if use_year else None
|
346 |
-
month = selected_date.month if use_month else None
|
347 |
-
day = selected_date.day if use_day else None
|
348 |
-
|
349 |
-
if year is not None and month is not None and day is not None:
|
350 |
-
date_obj = selected_date
|
351 |
-
elif year is not None and month is not None:
|
352 |
-
date_obj = str(f"{year}-{month}")
|
353 |
-
elif year is not None:
|
354 |
-
date_obj = str(f"{year}")
|
355 |
-
else: # Return empty string if no date components are selected
|
356 |
-
return ""
|
357 |
-
|
358 |
-
|
359 |
-
date_in_words = date_to_words(date_obj)
|
360 |
-
|
361 |
-
|
362 |
-
translator = GoogleTranslator(source='auto', target=date_language_input)
|
363 |
-
translated_date_words = translator.translate(date_in_words)
|
364 |
-
return custom_normalize(translated_date_words)
|
365 |
-
|
366 |
-
def update_journal_sum(gematria_text, date_words_output):
|
367 |
-
sum_value = calculate_gematria_sum(gematria_text, date_words_output)
|
368 |
-
return sum_value, sum_value, sum_value
|
369 |
-
|
370 |
def update_rounds_combination(round_x, round_y):
|
371 |
return f"{int(round_x)},{int(round_y)}"
|
372 |
|
373 |
-
def update_step_half(float_step):
|
374 |
-
new_step = math.ceil(float_step / 2)
|
375 |
-
return new_step, float_step / 2
|
376 |
-
|
377 |
-
def update_step_double(float_step):
|
378 |
-
new_step = math.ceil(float_step * 2)
|
379 |
-
return new_step, float_step * 2
|
380 |
|
381 |
|
382 |
def find_closest_phrase(target_phrase, phrases):
|
@@ -401,98 +275,146 @@ with gr.Blocks() as app:
|
|
401 |
logging.debug(f"Closest phrase found: {best_match} with score: {best_score}") # Log the best match
|
402 |
return best_match
|
403 |
|
404 |
-
def perform_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah, include_bible, include_quran, include_hindu, include_tripitaka, gematria_text, date_words_output, selected_date):
|
405 |
-
# Inside perform_search
|
406 |
-
els_results = perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces,
|
407 |
-
strip_diacritics_chk, include_torah, include_bible, include_quran,
|
408 |
-
include_hindu,
|
409 |
-
include_tripitaka)
|
410 |
|
411 |
-
|
|
|
|
|
|
|
412 |
most_frequent_phrases = {}
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
447 |
|
448 |
# --- Batch Translation ---
|
|
|
449 |
selected_language_long = tlang
|
450 |
tlang_short = LANGUAGES_SUPPORTED.get(selected_language_long)
|
451 |
if tlang_short is None:
|
452 |
tlang_short = "en"
|
453 |
-
logger.warning(
|
|
|
454 |
|
455 |
-
# Prepare lists for batch translation, including source language
|
456 |
phrases_to_translate = []
|
457 |
-
phrases_source_langs = []
|
458 |
results_to_translate = []
|
459 |
-
results_source_langs = []
|
|
|
460 |
for result in combined_and_sorted_results:
|
461 |
phrases_to_translate.append(result.get('Most Frequent Phrase', ''))
|
462 |
phrases_source_langs.append(result.get("source_language", "auto"))
|
463 |
results_to_translate.append(result.get('result_text', ''))
|
464 |
results_source_langs.append(result.get("source_language", "auto"))
|
465 |
|
466 |
-
|
467 |
translated_phrases = translation_utils.batch_translate(phrases_to_translate, tlang_short, phrases_source_langs)
|
468 |
translated_result_texts = translation_utils.batch_translate(results_to_translate, tlang_short, results_source_langs)
|
469 |
|
470 |
-
|
471 |
for i, result in enumerate(combined_and_sorted_results):
|
472 |
-
result['translated_text'] = translated_result_texts.get(results_to_translate[i],
|
473 |
-
result['Translated Most Frequent Phrase'] = translated_phrases.get(phrases_to_translate[i],
|
|
|
474 |
|
475 |
-
|
476 |
-
|
477 |
-
updated_els_results = add_monthly_projection(updated_els_results, selected_date) # Call correct functions with correct params
|
478 |
-
updated_els_results = add_yearly_projection(updated_els_results, selected_date)
|
479 |
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
484 |
|
485 |
df = pd.DataFrame(combined_and_sorted_results)
|
486 |
df.index = range(1, len(df) + 1)
|
487 |
df.reset_index(inplace=True)
|
488 |
df.rename(columns={'index': 'Result Number'}, inplace=True)
|
489 |
|
490 |
-
for i, result in enumerate(combined_and_sorted_results): # Iterate through the combined list
|
491 |
-
result['Result Number'] = i + 1
|
492 |
-
|
493 |
search_config = {
|
494 |
-
"
|
495 |
-
"rounds_combination": rounds_combination,
|
496 |
"target_language": tlang,
|
497 |
"strip_spaces": strip_spaces,
|
498 |
"strip_in_braces": strip_in_braces,
|
@@ -503,72 +425,47 @@ with gr.Blocks() as app:
|
|
503 |
"include_hindu": include_hindu,
|
504 |
"include_tripitaka": include_tripitaka,
|
505 |
"gematria_text": gematria_text,
|
506 |
-
"
|
|
|
507 |
}
|
508 |
|
509 |
output_data = {
|
510 |
"search_configuration": search_config,
|
511 |
-
"results": combined_and_sorted_results
|
512 |
}
|
513 |
|
514 |
json_data = output_data
|
515 |
-
|
516 |
-
# --- Return results ---
|
517 |
combined_most_frequent = "\n".join(
|
518 |
-
f"{book}: {phrase}" for book, phrase in most_frequent_phrases.items()
|
519 |
-
return df, combined_most_frequent, json_data
|
520 |
|
|
|
|
|
|
|
521 |
|
|
|
|
|
|
|
|
|
522 |
|
523 |
# --- Event Triggers ---
|
524 |
round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
|
525 |
round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
|
526 |
|
527 |
-
selected_date.change(update_date_words, inputs=[selected_date, date_language_input, use_day, use_month, use_year], outputs=[date_words_output])
|
528 |
-
date_language_input.change(update_date_words, inputs=[selected_date, date_language_input, use_day, use_month, use_year], outputs=[date_words_output])
|
529 |
|
530 |
-
|
531 |
-
|
|
|
|
|
|
|
|
|
532 |
|
533 |
-
half_step_btn.click(update_step_half, inputs=[float_step], outputs=[step, float_step])
|
534 |
-
double_step_btn.click(update_step_double, inputs=[float_step], outputs=[step, float_step])
|
535 |
|
536 |
translate_btn.click(
|
537 |
perform_search,
|
538 |
-
inputs=[
|
539 |
outputs=[markdown_output, most_frequent_phrase_output, json_output]
|
540 |
)
|
541 |
|
542 |
-
app.load(
|
543 |
-
update_date_words,
|
544 |
-
inputs=[selected_date, date_language_input, use_day, use_month, use_year], # Include all 5 inputs
|
545 |
-
outputs=[date_words_output]
|
546 |
-
)
|
547 |
-
|
548 |
-
use_day.change(
|
549 |
-
update_date_words,
|
550 |
-
inputs=[selected_date, date_language_input, use_day, use_month, use_year],
|
551 |
-
outputs=[date_words_output]
|
552 |
-
)
|
553 |
-
use_month.change(
|
554 |
-
update_date_words,
|
555 |
-
inputs=[selected_date, date_language_input, use_day, use_month, use_year],
|
556 |
-
outputs=[date_words_output]
|
557 |
-
)
|
558 |
-
use_year.change(
|
559 |
-
update_date_words,
|
560 |
-
inputs=[selected_date, date_language_input, use_day, use_month, use_year],
|
561 |
-
outputs=[date_words_output]
|
562 |
-
)
|
563 |
-
|
564 |
-
def checkbox_behavior(use_day_value, use_month_value):
|
565 |
-
if use_day_value: # Tick month and year automatically when day is ticked.
|
566 |
-
return True, True
|
567 |
-
|
568 |
-
return use_month_value, True # return month value unchanged and automatically tick year if month is checked
|
569 |
-
|
570 |
-
use_day.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year])
|
571 |
-
use_month.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year]) #No need for use_day here, day won't be changed by month
|
572 |
|
573 |
|
574 |
if __name__ == "__main__":
|
|
|
28 |
import calendar
|
29 |
import translation_utils
|
30 |
import hashlib
|
31 |
+
import time
|
32 |
|
33 |
translation_utils.create_translation_table()
|
34 |
|
|
|
167 |
else:
|
168 |
return None
|
169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
|
171 |
+
def add_24h_projection(results_dict, date_str): # Add date_str as parameter
|
172 |
+
combined_results = []
|
173 |
+
for book_name, results in results_dict.items():
|
174 |
+
combined_results.extend(results)
|
175 |
|
176 |
+
num_results = len(combined_results)
|
177 |
+
if num_results > 0:
|
178 |
+
time_interval = timedelta(minutes=24 * 60 / num_results)
|
179 |
+
current_datetime = datetime.combine(datetime.today(), datetime.min.time())
|
180 |
+
for i in range(num_results):
|
181 |
+
next_datetime = current_datetime + time_interval
|
182 |
+
time_range_str = f"{current_datetime.strftime('%H:%M')}-{next_datetime.strftime('%H:%M')}"
|
183 |
+
combined_results[i]['24h Projection'] = time_range_str
|
184 |
+
current_datetime = next_datetime
|
185 |
|
186 |
+
# Re-organize results back into their book dictionaries
|
187 |
+
reorganized_results = defaultdict(list)
|
188 |
+
for result in combined_results:
|
189 |
+
book_name = result.get('book', 'Unknown') #Get book name to reorganize
|
190 |
+
reorganized_results[book_name].append(result)
|
191 |
|
192 |
+
return reorganized_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
|
194 |
|
195 |
def sort_results(results):
|
|
|
210 |
with gr.Column():
|
211 |
with gr.Row():
|
212 |
tlang = create_language_dropdown("Target Language for Result Translation", default_value='english')
|
213 |
+
start_date_range = Calendar(type="datetime", label="Start Date for ELS")
|
214 |
+
end_date_range = Calendar(type="datetime", label="End Date for ELS")
|
215 |
use_day = gr.Checkbox(label="Use Day", info="Check to include day in search", value=True)
|
216 |
use_month = gr.Checkbox(label="Use Month", info="Check to include month in search", value=True)
|
217 |
use_year = gr.Checkbox(label="Use Year", info="Check to include year in search", value=True)
|
218 |
date_language_input = create_language_dropdown("Language of the person/topic (optional) (Date Word Language)", default_value='english')
|
219 |
with gr.Row():
|
220 |
gematria_text = gr.Textbox(label="Name and/or Topic (required)", value="Hans Albert Einstein Mileva Marity-Einstein")
|
|
|
|
|
|
|
221 |
|
222 |
|
223 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
224 |
with gr.Column():
|
225 |
round_x = gr.Number(label="Round (1)", value=1)
|
226 |
round_y = gr.Number(label="Round (2)", value=-1)
|
|
|
248 |
|
249 |
# --- Event Handlers ---
|
250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
def update_rounds_combination(round_x, round_y):
|
252 |
return f"{int(round_x)},{int(round_y)}"
|
253 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
|
255 |
|
256 |
def find_closest_phrase(target_phrase, phrases):
|
|
|
275 |
logging.debug(f"Closest phrase found: {best_match} with score: {best_score}") # Log the best match
|
276 |
return best_match
|
277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
|
279 |
+
def perform_search(rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah, include_bible, include_quran, include_hindu, include_tripitaka, gematria_text, start_date, end_date, date_language_input):
|
280 |
+
overall_start_time = time.time()
|
281 |
+
|
282 |
+
combined_and_sorted_results = []
|
283 |
most_frequent_phrases = {}
|
284 |
+
|
285 |
+
current_date = start_date
|
286 |
+
while current_date <= end_date:
|
287 |
+
date_str = current_date.strftime("%Y-%m-%d")
|
288 |
+
date_words = translate_date_to_words(current_date, date_language_input)
|
289 |
+
|
290 |
+
step = calculate_gematria_sum(gematria_text, date_words)
|
291 |
+
logger.debug(f"Calculated step for {date_str}: {step}")
|
292 |
+
|
293 |
+
if step != 0 and rounds_combination != "0,0":
|
294 |
+
# Process for the current date
|
295 |
+
els_results_single_date = {}
|
296 |
+
if include_torah:
|
297 |
+
els_results_single_date["Torah"] = cached_process_json_files(torah.process_json_files, 1, 39, step,
|
298 |
+
rounds_combination, 0, tlang, strip_spaces,
|
299 |
+
strip_in_braces, strip_diacritics_chk)
|
300 |
+
if include_bible:
|
301 |
+
els_results_single_date["Bible"] = cached_process_json_files(bible.process_json_files, 40, 66, step,
|
302 |
+
rounds_combination, 0, tlang, strip_spaces,
|
303 |
+
strip_in_braces, strip_diacritics_chk)
|
304 |
+
if include_quran:
|
305 |
+
els_results_single_date["Quran"] = cached_process_json_files(quran.process_json_files, 1, 114, step,
|
306 |
+
rounds_combination, 0, tlang, strip_spaces,
|
307 |
+
strip_in_braces, strip_diacritics_chk)
|
308 |
+
if include_hindu:
|
309 |
+
els_results_single_date["Rig Veda"] = cached_process_json_files(hindu.process_json_files, 1, 10, step,
|
310 |
+
rounds_combination, 0, tlang, False,
|
311 |
+
strip_in_braces, strip_diacritics_chk)
|
312 |
+
if include_tripitaka:
|
313 |
+
els_results_single_date["Tripitaka"] = cached_process_json_files(tripitaka.process_json_files, 1, 52,
|
314 |
+
step, rounds_combination, 0, tlang,
|
315 |
+
strip_spaces, strip_in_braces,
|
316 |
+
strip_diacritics_chk)
|
317 |
+
|
318 |
+
# Add 24h projection *before* iterating through books
|
319 |
+
els_results_single_date = add_24h_projection(els_results_single_date, date_str)
|
320 |
+
|
321 |
+
for book_name, book_results in els_results_single_date.items():
|
322 |
+
logger.debug(f"Processing results for book: {book_name}")
|
323 |
+
if book_results:
|
324 |
+
most_frequent_phrases[book_name] = ""
|
325 |
+
for result in book_results:
|
326 |
+
try:
|
327 |
+
gematria_sum = calculate_gematria(result['result_text'])
|
328 |
+
max_words = len(result['result_text'].split())
|
329 |
+
matching_phrases = search_gematria_in_db(gematria_sum, max_words)
|
330 |
+
max_words_limit = 20
|
331 |
+
while not matching_phrases and max_words < max_words_limit:
|
332 |
+
max_words += 1
|
333 |
+
matching_phrases = search_gematria_in_db(gematria_sum, max_words)
|
334 |
+
|
335 |
+
if matching_phrases:
|
336 |
+
most_frequent_phrase = get_most_frequent_phrase(matching_phrases)
|
337 |
+
most_frequent_phrases[book_name] = most_frequent_phrase
|
338 |
+
else:
|
339 |
+
closest_phrase = find_closest_phrase(result['result_text'],
|
340 |
+
search_gematria_in_db(gematria_sum,
|
341 |
+
max_words_limit))
|
342 |
+
most_frequent_phrases[book_name] = closest_phrase or ""
|
343 |
+
|
344 |
+
result['Most Frequent Phrase'] = most_frequent_phrases[book_name]
|
345 |
+
result['date'] = date_str
|
346 |
+
if 'book' in result:
|
347 |
+
if isinstance(result['book'], int):
|
348 |
+
result['book'] = f"{book_name} {result['book']}."
|
349 |
+
|
350 |
+
except KeyError as e:
|
351 |
+
print(f"DEBUG: KeyError - Key '{e.args[0]}' not found in result. Skipping this result.")
|
352 |
+
continue
|
353 |
+
|
354 |
+
|
355 |
+
combined_and_sorted_results.extend(book_results)
|
356 |
+
|
357 |
+
|
358 |
+
|
359 |
+
current_date += timedelta(days=1)
|
360 |
|
361 |
# --- Batch Translation ---
|
362 |
+
translation_start_time = time.time()
|
363 |
selected_language_long = tlang
|
364 |
tlang_short = LANGUAGES_SUPPORTED.get(selected_language_long)
|
365 |
if tlang_short is None:
|
366 |
tlang_short = "en"
|
367 |
+
logger.warning(
|
368 |
+
f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).")
|
369 |
|
|
|
370 |
phrases_to_translate = []
|
371 |
+
phrases_source_langs = []
|
372 |
results_to_translate = []
|
373 |
+
results_source_langs = []
|
374 |
+
|
375 |
for result in combined_and_sorted_results:
|
376 |
phrases_to_translate.append(result.get('Most Frequent Phrase', ''))
|
377 |
phrases_source_langs.append(result.get("source_language", "auto"))
|
378 |
results_to_translate.append(result.get('result_text', ''))
|
379 |
results_source_langs.append(result.get("source_language", "auto"))
|
380 |
|
|
|
381 |
translated_phrases = translation_utils.batch_translate(phrases_to_translate, tlang_short, phrases_source_langs)
|
382 |
translated_result_texts = translation_utils.batch_translate(results_to_translate, tlang_short, results_source_langs)
|
383 |
|
|
|
384 |
for i, result in enumerate(combined_and_sorted_results):
|
385 |
+
result['translated_text'] = translated_result_texts.get(results_to_translate[i], result.get('result_text', ''))
|
386 |
+
result['Translated Most Frequent Phrase'] = translated_phrases.get(phrases_to_translate[i],
|
387 |
+
result.get('Most Frequent Phrase', ''))
|
388 |
|
389 |
+
translation_end_time = time.time()
|
390 |
+
logger.debug(f"Batch translation took: {translation_end_time - translation_start_time} seconds")
|
|
|
|
|
391 |
|
392 |
+
# --- Time projections ---
|
393 |
+
time_projections_start_time = time.time()
|
394 |
+
for result in combined_and_sorted_results:
|
395 |
+
selected_date = datetime.strptime(result['date'], '%Y-%m-%d')
|
396 |
+
book_name = result.get('book', 'Unknown')
|
397 |
+
projection_input = {book_name: [result]}
|
398 |
+
|
399 |
+
updated_date_results = add_24h_projection(projection_input, result['date'])
|
400 |
+
|
401 |
+
result.update(updated_date_results[book_name][0])
|
402 |
+
|
403 |
+
combined_and_sorted_results = sort_results(combined_and_sorted_results)
|
404 |
+
time_projections_end_time = time.time()
|
405 |
+
logger.debug(
|
406 |
+
f"Time projections took: {time_projections_end_time - time_projections_start_time} seconds")
|
407 |
+
|
408 |
+
# --- Dataframe and JSON creation ---
|
409 |
+
dataframe_json_start_time = time.time()
|
410 |
|
411 |
df = pd.DataFrame(combined_and_sorted_results)
|
412 |
df.index = range(1, len(df) + 1)
|
413 |
df.reset_index(inplace=True)
|
414 |
df.rename(columns={'index': 'Result Number'}, inplace=True)
|
415 |
|
|
|
|
|
|
|
416 |
search_config = {
|
417 |
+
"rounds_combination": rounds_combination, # No more 'step'
|
|
|
418 |
"target_language": tlang,
|
419 |
"strip_spaces": strip_spaces,
|
420 |
"strip_in_braces": strip_in_braces,
|
|
|
425 |
"include_hindu": include_hindu,
|
426 |
"include_tripitaka": include_tripitaka,
|
427 |
"gematria_text": gematria_text,
|
428 |
+
"start_date": start_date.strftime("%Y-%m-%d"),
|
429 |
+
"end_date": end_date.strftime("%Y-%m-%d")
|
430 |
}
|
431 |
|
432 |
output_data = {
|
433 |
"search_configuration": search_config,
|
434 |
+
"results": combined_and_sorted_results
|
435 |
}
|
436 |
|
437 |
json_data = output_data
|
|
|
|
|
438 |
combined_most_frequent = "\n".join(
|
439 |
+
f"{book}: {phrase}" for book, phrase in most_frequent_phrases.items() if phrase)
|
|
|
440 |
|
441 |
+
dataframe_json_end_time = time.time()
|
442 |
+
logger.debug(
|
443 |
+
f"Dataframe and JSON creation took: {dataframe_json_end_time - dataframe_json_start_time} seconds")
|
444 |
|
445 |
+
overall_end_time = time.time()
|
446 |
+
logger.debug(f"Overall process took: {overall_end_time - overall_start_time} seconds")
|
447 |
+
|
448 |
+
return df, combined_most_frequent, json_data
|
449 |
|
450 |
# --- Event Triggers ---
|
451 |
round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
|
452 |
round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
|
453 |
|
|
|
|
|
454 |
|
455 |
+
def update_rounds_combination(round_x, round_y):
|
456 |
+
return f"{int(round_x)},{int(round_y)}"
|
457 |
+
|
458 |
+
|
459 |
+
round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
|
460 |
+
round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
|
461 |
|
|
|
|
|
462 |
|
463 |
translate_btn.click(
|
464 |
perform_search,
|
465 |
+
inputs=[rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah_chk, include_bible_chk, include_quran_chk, include_hindu_chk, include_tripitaka_chk, gematria_text, start_date_range, end_date_range, date_language_input],
|
466 |
outputs=[markdown_output, most_frequent_phrase_output, json_output]
|
467 |
)
|
468 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
469 |
|
470 |
|
471 |
if __name__ == "__main__":
|