Spaces:
Running
Running
import logging | |
import os | |
import subprocess | |
try: | |
result = subprocess.run(['bash', 'install.sh'], check=True) | |
except subprocess.CalledProcessError as e: | |
print(f"The install script failed with return code {e.returncode}") | |
logger = logging.getLogger(__name__) | |
logging.basicConfig(level=logging.INFO) | |
import gradio as gr | |
import torah | |
import bible | |
import quran | |
import hindu | |
import tripitaka | |
from utils import number_to_ordinal_word, custom_normalize, date_to_words, translate_date_to_words | |
from gematria import calculate_gematria, strip_diacritics | |
import pandas as pd | |
from deep_translator import GoogleTranslator | |
from gradio_calendar import Calendar | |
from datetime import datetime, timedelta | |
import math | |
import json | |
import re | |
import sqlite3 | |
from collections import defaultdict | |
from typing import List, Tuple | |
# import rich # Removed rich | |
# from fuzzywuzzy import fuzz # Removed fuzzywuzzy | |
import calendar | |
import translation_utils | |
import hashlib | |
import copy | |
from collections import Counter | |
translation_utils.create_translation_table() | |
# Create a translator instance *once* globally | |
translator = GoogleTranslator(source='auto', target='auto') | |
LANGUAGES_SUPPORTED = translator.get_supported_languages(as_dict=True) | |
LANGUAGE_CODE_MAP = LANGUAGES_SUPPORTED # Use deep_translator's mapping directly | |
# --- Constants --- | |
DATABASE_FILE = 'gematria.db' | |
MAX_PHRASE_LENGTH_LIMIT = 20 | |
ELS_CACHE_DB = "els_cache.db" | |
DATABASE_TIMEOUT = 60 | |
# --- ELS Cache Functions --- | |
def create_els_cache_table(): | |
if not os.path.exists(ELS_CACHE_DB): | |
with sqlite3.connect(ELS_CACHE_DB) as conn: | |
conn.execute(''' | |
CREATE TABLE IF NOT EXISTS els_cache ( | |
query_hash TEXT PRIMARY KEY, | |
function_name TEXT, | |
args TEXT, | |
kwargs TEXT, | |
results TEXT | |
) | |
''') | |
# --- Database Initialization --- | |
def initialize_database(): | |
global conn | |
conn = sqlite3.connect(DATABASE_FILE) | |
cursor = conn.cursor() | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS results ( | |
gematria_sum INTEGER, | |
words TEXT, | |
translation TEXT, | |
book TEXT, | |
chapter INTEGER, | |
verse INTEGER, | |
phrase_length INTEGER, | |
word_position TEXT, | |
PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position) | |
) | |
''') | |
cursor.execute(''' | |
CREATE INDEX IF NOT EXISTS idx_results_gematria | |
ON results (gematria_sum) | |
''') | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS processed_books ( | |
book TEXT PRIMARY KEY, | |
max_phrase_length INTEGER | |
) | |
''') | |
conn.commit() | |
# --- Initialize Database --- | |
initialize_database() | |
# --- ELS Cache Functions --- | |
def create_els_cache_table(): | |
with sqlite3.connect(ELS_CACHE_DB) as conn: | |
try: | |
conn.execute(''' | |
CREATE TABLE IF NOT EXISTS els_cache ( | |
query_hash TEXT PRIMARY KEY, | |
function_name TEXT, | |
args TEXT, | |
kwargs TEXT, | |
results TEXT | |
) | |
''') | |
except sqlite3.OperationalError as e: | |
logger.error(f"Error creating table: {e}") | |
def get_query_hash(func, args, kwargs): | |
key = (func.__name__, args, kwargs) | |
return hashlib.sha256(json.dumps(key).encode()).hexdigest() | |
def cached_process_json_files(func, *args, **kwargs): | |
# Create a dictionary to store the parameters | |
params = { | |
"function": f"{func.__module__}.{func.__name__}" | |
} | |
# Add the positional arguments with their names | |
arg_names = func.__code__.co_varnames[:func.__code__.co_argcount] | |
for name, value in zip(arg_names, args): | |
params[name] = value | |
# Add the keyword arguments | |
for name, value in kwargs.items(): | |
params[name] = value | |
# Convert the parameters to a JSON string | |
params_json = json.dumps(params) | |
# Use the parameters JSON string to generate the query hash | |
query_hash = get_query_hash(func, params_json, "") | |
# Ensure the table exists before any operations | |
create_els_cache_table() | |
try: | |
with sqlite3.connect(ELS_CACHE_DB, timeout=DATABASE_TIMEOUT) as conn: | |
cursor = conn.cursor() | |
cursor.execute( | |
"SELECT results FROM els_cache WHERE query_hash = ?", (query_hash,)) | |
result = cursor.fetchone() | |
if result: | |
logger.info(f"Cache hit for query: {query_hash}") | |
return json.loads(result[0]) | |
except sqlite3.Error as e: | |
logger.error(f"Database error checking cache: {e}") | |
logger.info(f"Cache miss for query: {query_hash}") | |
results = func(*args, **kwargs) | |
try: | |
with sqlite3.connect(ELS_CACHE_DB, timeout=DATABASE_TIMEOUT) as conn: | |
cursor = conn.cursor() | |
cursor.execute( | |
"INSERT INTO els_cache (query_hash, function_name, args, kwargs, results) VALUES (?, ?, ?, ?, ?)", | |
(query_hash, params["function"], params_json, json.dumps({}), json.dumps(results))) | |
conn.commit() | |
except sqlite3.Error as e: | |
logger.error(f"Database error caching results: {e}") | |
return results | |
# --- Helper Functions (from Network app.py) --- | |
def flatten_text(text: List) -> str: | |
if isinstance(text, list): | |
return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text) | |
return text | |
def search_gematria_in_db(gematria_sum: int, max_words: int) -> List[Tuple[str, str, int, int, int, str]]: | |
global conn | |
with sqlite3.connect(DATABASE_FILE) as conn: | |
cursor = conn.cursor() | |
cursor.execute(''' | |
SELECT words, book, chapter, verse, phrase_length, word_position | |
FROM results | |
WHERE gematria_sum = ? AND phrase_length <= ? | |
''', (gematria_sum, max_words)) | |
results = cursor.fetchall() | |
return results | |
def get_most_frequent_phrase(results): | |
phrase_counts = defaultdict(int) | |
for words, book, chapter, verse, phrase_length, word_position in results: | |
phrase_counts[words] += 1 | |
most_frequent_phrase = max(phrase_counts, key=phrase_counts.get) if phrase_counts else None | |
return most_frequent_phrase | |
# --- Functions from BOS app.py --- | |
def create_language_dropdown(label, default_value='English', show_label=True): | |
return gr.Dropdown( | |
choices=list(LANGUAGE_CODE_MAP.keys()), | |
label=label, | |
value=default_value, | |
show_label=show_label | |
) | |
def calculate_gematria_sum(text, date_words): | |
if text or date_words: | |
combined_input = f"{text} {date_words}" | |
logger.info(f"searching for input: {combined_input}") | |
numbers = re.findall(r'\d+', combined_input) | |
text_without_numbers = re.sub(r'\d+', '', combined_input) | |
number_sum = sum(int(number) for number in numbers) | |
text_gematria = calculate_gematria(strip_diacritics(text_without_numbers)) | |
total_sum = text_gematria + number_sum | |
return total_sum | |
else: | |
return None | |
def perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, | |
include_torah, include_bible, include_quran, include_hindu, include_tripitaka): | |
if step == 0 or rounds_combination == "0,0": | |
return None | |
results = {} | |
length = 0 | |
selected_language_long = tlang | |
tlang = LANGUAGES_SUPPORTED.get(selected_language_long) | |
if tlang is None: | |
tlang = "en" | |
logger.warning( | |
f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).") | |
if include_torah: | |
logger.debug( | |
f"Arguments for Torah: {(1, 39, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)}") | |
results["Torah"] = cached_process_json_files(torah.process_json_files, 1, 39, step, rounds_combination, length, | |
tlang, strip_spaces, strip_in_braces, strip_diacritics_chk) | |
else: | |
results["Torah"] = [] | |
if include_bible: | |
results["Bible"] = cached_process_json_files(bible.process_json_files, 40, 66, step, rounds_combination, | |
length, | |
tlang, strip_spaces, strip_in_braces, strip_diacritics_chk) | |
else: | |
results["Bible"] = [] | |
if include_quran: | |
results["Quran"] = cached_process_json_files(quran.process_json_files, 1, 114, step, rounds_combination, | |
length, | |
tlang, strip_spaces, strip_in_braces, strip_diacritics_chk) | |
else: | |
results["Quran"] = [] | |
if include_hindu: | |
results["Rig Veda"] = cached_process_json_files( | |
hindu.process_json_files, 1, 10, step, rounds_combination, length, tlang, False, strip_in_braces, | |
strip_diacritics_chk) | |
else: | |
results["Rig Veda"] = [] | |
if include_tripitaka: | |
results["Tripitaka"] = cached_process_json_files( | |
tripitaka.process_json_files, 1, 52, step, rounds_combination, length, tlang, strip_spaces, | |
strip_in_braces, strip_diacritics_chk) | |
else: | |
results["Tripitaka"] = [] | |
return results | |
def add_24h_projection(results_dict): | |
for book_name, results in results_dict.items(): | |
num_results = len(results) | |
if num_results > 0: | |
time_interval = timedelta(minutes=24 * 60 / num_results) | |
current_time = datetime.min.time() | |
for i in range(num_results): | |
next_time = (datetime.combine(datetime.min, current_time) + time_interval).time() | |
time_range_str = f"{current_time.strftime('%H:%M')}-{next_time.strftime('%H:%M')}" | |
results[i]['24h Projection'] = time_range_str | |
current_time = next_time | |
return results_dict | |
def add_monthly_projection(results_dict, selected_date): | |
if selected_date is None: | |
return results_dict | |
for book_name, results in results_dict.items(): | |
num_results = len(results) | |
if num_results > 0: | |
days_in_month = calendar.monthrange(selected_date.year, selected_date.month)[1] | |
total_seconds = (days_in_month - 1) * 24 * 3600 | |
seconds_interval = total_seconds / num_results | |
start_datetime = datetime(selected_date.year, selected_date.month, 1) | |
current_datetime = start_datetime | |
for i in range(num_results): | |
next_datetime = current_datetime + timedelta(seconds=seconds_interval) | |
current_date = current_datetime.date() | |
next_date = next_datetime.date() | |
date_range_str = f"{current_date.strftime('%h %d')} - {next_date.strftime('%h %d')}" | |
results[i]['Monthly Projection'] = date_range_str | |
current_datetime = next_datetime | |
current_date = next_datetime.date() | |
return results_dict | |
def add_yearly_projection(results_dict, selected_date): | |
if selected_date is None: | |
return results_dict | |
for book_name, results in results_dict.items(): | |
num_results = len(results) | |
if num_results > 0: | |
days_in_year = 366 if calendar.isleap(selected_date.year) else 365 | |
total_seconds = (days_in_year - 1) * 24 * 3600 | |
seconds_interval = total_seconds / num_results | |
start_datetime = datetime(selected_date.year, 1, 1) | |
current_datetime = start_datetime | |
for i in range(num_results): | |
next_datetime = current_datetime + timedelta(seconds=seconds_interval) | |
current_date = current_datetime.date() | |
next_date = next_datetime.date() | |
date_range_str = f"{current_date.strftime('%b %d')} - {next_date.strftime('%b %d')}" | |
results[i]['Yearly Projection'] = date_range_str | |
current_datetime = next_datetime | |
return results_dict | |
def sort_results(results): | |
def parse_time(time_str): | |
try: | |
hours, minutes = map(int, time_str.split(':')) | |
return hours * 60 + minutes | |
except ValueError: | |
return 24 * 60 | |
return sorted(results, key=lambda x: ( | |
parse_time(x.get('24h Projection', '23:59').split('-')[0]), | |
parse_time(x.get('24h Projection', '23:59').split('-')[1]) | |
)) | |
def extract_rounds_combinations(): | |
"""Extracts unique rounds combinations from the database.""" | |
combinations = set() | |
try: | |
with sqlite3.connect(ELS_CACHE_DB) as conn: | |
cursor = conn.cursor() | |
cursor.execute("SELECT args FROM els_cache") | |
all_args = cursor.fetchall() | |
for args_tuple in all_args: | |
args_str = args_tuple[0] | |
try: | |
args_json = json.loads(args_str) | |
if 'rounds' in args_json: | |
combinations.add(args_json['rounds']) | |
except json.JSONDecodeError: | |
logger.error(f"Could not decode JSON for args: {args_str}") | |
except sqlite3.Error as e: | |
logger.error(f"Database error: {e}") | |
logger.info(f"Found unique rounds combinations: {combinations}") | |
return ["All"] + sorted(list(combinations)) | |
def update_rounds_dropdown(): | |
new_choices = extract_rounds_combinations() | |
return new_choices | |
def perform_gematria_calculation_for_date_range(start_date, end_date): | |
logger.debug(f"Calculating date gematria for range: {start_date} - {end_date}") | |
results = {} | |
delta = timedelta(days=1) | |
current_date = start_date | |
while current_date <= end_date: | |
date_string = current_date.strftime("%Y-%m-%d") | |
date_words = date_to_words(date_string) | |
date_gematria = calculate_gematria_sum(date_words, "") # Angepasst, um der Funktion calculate_gematria_sum zu entsprechen | |
results[date_string] = { | |
"date_words": date_words, | |
"date_gematria": date_gematria, | |
} | |
current_date += delta | |
logger.debug(f"Finished calculating date gematria.") | |
return results | |
def find_matching_dates(date_gematrias, names, search_journal_sum): | |
logger.debug(f"Searching for matches with journal sum: {search_journal_sum}") | |
matching_dates = {} | |
for name in names: | |
name_gematria = calculate_gematria_sum(name, "") # Angepasst, um der Funktion calculate_gematria_sum zu entsprechen | |
target_date_gematria = search_journal_sum - name_gematria if name_gematria is not None else None | |
logger.debug(f"Name: {name}, Gematria: {name_gematria}, Target Date Gematria: {target_date_gematria}") | |
if target_date_gematria is not None: | |
for date_str, date_data in date_gematrias.items(): | |
if date_data["date_gematria"] == target_date_gematria: | |
if name not in matching_dates: | |
matching_dates[name] = [] | |
matching_dates[name].append(date_str) | |
logger.debug(f"Matches for {name}: {matching_dates.get(name, [])}") | |
return matching_dates | |
def find_shared_journal_sums(date_gematrias, names): | |
"""Finds shared journal sums and formats output with names and dates together.""" | |
logger.debug("Calculating shared journal sums...") | |
shared_sums = {} | |
name_gematrias = {name: calculate_gematria_sum(name, "") for name in names} | |
for date_str, date_data in date_gematrias.items(): | |
date_gematria = date_data["date_gematria"] | |
for name, name_gematria in name_gematrias.items(): | |
journal_sum = date_gematria + name_gematria | |
journal_sum_str = str(journal_sum) # Konvertiere den Schlüssel (journal_sum) in einen String | |
if journal_sum_str not in shared_sums: | |
shared_sums[journal_sum_str] = {} | |
if name not in shared_sums[journal_sum_str]: | |
shared_sums[journal_sum_str][name] = [] | |
shared_sums[journal_sum_str][name].append(date_str) | |
# Filter out sums not shared by at least two names and format output | |
result = {} | |
for journal_sum_str, data in shared_sums.items(): | |
if len(data) >= 2: | |
result[journal_sum_str] = {} | |
for name, dates in data.items(): | |
result[journal_sum_str][name] = dates | |
logger.debug(f"Shared Journal Sums: {result}") | |
return result | |
def calculate_and_find_dates(start_date, end_date, names_input, search_journal_sum, find_shared=False): | |
names = [n.strip() for n in names_input.split("\n") if n.strip()] | |
date_gematrias = perform_gematria_calculation_for_date_range(start_date, end_date) | |
if find_shared: | |
shared_sums = find_shared_journal_sums(date_gematrias, names) | |
return None, shared_sums | |
else: | |
matching_dates = find_matching_dates(date_gematrias, names, int(search_journal_sum)) | |
return matching_dates, None | |
# --- Main Gradio App --- | |
with gr.Blocks() as app: | |
with gr.Tab("ELS Search"): | |
with gr.Column(): | |
with gr.Row(): | |
tlang = create_language_dropdown("Target Language for Result Translation", default_value='english') | |
selected_date = Calendar(type="datetime", label="Date to investigate (optional)", | |
info="Pick a date from the calendar") | |
use_day = gr.Checkbox(label="Use Day", info="Check to include day in search", value=True) | |
use_month = gr.Checkbox(label="Use Month", info="Check to include month in search", value=True) | |
use_year = gr.Checkbox(label="Use Year", info="Check to include year in search", value=True) | |
date_language_input = create_language_dropdown( | |
"Language of the person/topic (optional) (Date Word Language)", default_value='english') | |
with gr.Row(): | |
gematria_text = gr.Textbox(label="Name and/or Topic (required)", | |
value="Hans Albert Einstein Mileva Marity-Einstein") | |
date_words_output = gr.Textbox(label="Date in Words Translated (optional)") | |
gematria_result = gr.Number(label="Journal Sum") | |
# with gr.Row(): | |
with gr.Row(): | |
step = gr.Number(label="Jump Width (Steps) for ELS") | |
float_step = gr.Number(visible=False, value=1) | |
half_step_btn = gr.Button("Steps / 2") | |
double_step_btn = gr.Button("Steps * 2") | |
with gr.Column(): | |
round_x = gr.Number(label="Round (1)", value=1) | |
round_y = gr.Number(label="Round (2)", value=-1) | |
rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-0.5,0.5,-1") | |
with gr.Row(): | |
include_torah_chk = gr.Checkbox(label="Include Torah", value=True) | |
include_bible_chk = gr.Checkbox(label="Include Bible", value=True) | |
include_quran_chk = gr.Checkbox(label="Include Quran", value=True) | |
include_hindu_chk = gr.Checkbox(label="Include Rigveda", value=False) | |
include_tripitaka_chk = gr.Checkbox(label="Include Tripitaka", value=False) | |
strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True) | |
strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True) | |
strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True) | |
translate_btn = gr.Button("Search with ELS") | |
# --- Output Components --- | |
markdown_output = gr.Dataframe(label="ELS Results") | |
most_frequent_phrase_output = gr.Textbox(label="Most Frequent Phrase in Network Search") | |
json_output = gr.JSON(label="JSON Output") | |
with gr.Tab("Cache Database Search"): | |
with gr.Column(): | |
with gr.Row(): | |
main_book_filter = gr.Dropdown(label="Filter by Main Book", | |
choices=["All", "Torah", "Bible", "Quran", "Rig Veda", "Tripitaka"], | |
value="Torah") | |
# Keine choices hier, nur das Label und den Initialwert | |
rounds_filter = gr.Dropdown(label="Filter by Rounds", allow_custom_value=True, value="1,-1") | |
with gr.Row(): | |
search_type = gr.Radio(label="Search by", | |
choices=["Text in result_text", "Gematria Sum in results"], | |
value="Text in result_text") | |
with gr.Row(): | |
search_mode = gr.Radio(label="Search Mode", | |
choices=["Exact Search", "Contains Word"], | |
value="Contains Word") | |
with gr.Row(): | |
search_term = gr.Textbox(label="Search Term", visible=True) | |
gematria_sum_search = gr.Number(label="Gematria Sum", visible=False) | |
with gr.Row(): | |
search_db_btn = gr.Button("Search Cache Database") | |
with gr.Row(): | |
cache_search_results = gr.JSON(label="Cache Search Results") | |
def update_search_components(search_type): | |
if search_type == "Text in result_text": | |
return gr.Textbox.update(visible=True), gr.Number.update(visible=False) | |
else: | |
return gr.Textbox.update(visible=False), gr.Number.update(visible=True) | |
def search_cache_database(search_type, search_term, gematria_sum_search, main_book_filter, rounds_filter, search_mode): | |
"""Searches the cache database based on the selected filters and search term.""" | |
results = [] | |
search_term = strip_diacritics(search_term) | |
if main_book_filter == "All" and rounds_filter == "All" and not search_term and not gematria_sum_search: | |
return results | |
try: | |
with sqlite3.connect(ELS_CACHE_DB) as conn: | |
cursor = conn.cursor() | |
if search_type == "Text in result_text": | |
# Optimization: If only main_book_filter is selected, don't perform a full search | |
if main_book_filter != "All" and rounds_filter == "All" and not search_term: | |
return results | |
cursor.execute("SELECT * FROM els_cache") | |
all_results = cursor.fetchall() | |
columns = [desc[0] for desc in cursor.description] | |
for row in all_results: | |
row_dict = dict(zip(columns, row)) | |
args_dict = json.loads(row_dict['args']) | |
function_name = row_dict['function_name'] | |
# Function name filtering | |
include_result = False | |
if main_book_filter == "All": | |
include_result = True | |
elif main_book_filter == "Torah" and function_name == "torah.process_json_files": | |
include_result = True | |
elif main_book_filter == "Bible" and function_name == "bible.process_json_files": | |
include_result = True | |
elif main_book_filter == "Quran" and function_name == "quran.process_json_files": | |
include_result = True | |
elif main_book_filter == "Rig Veda" and function_name == "hindu.process_json_files": | |
include_result = True | |
elif main_book_filter == "Tripitaka" and function_name == "tripitaka.process_json_files": | |
include_result = True | |
if not include_result: | |
continue | |
# Rounds filtering | |
if rounds_filter != "All" and args_dict.get('rounds') != rounds_filter: | |
continue | |
try: | |
results_json = json.loads(row_dict['results']) | |
for result_entry in results_json: | |
if 'result_text' in result_entry: | |
if search_mode == "Exact Search" and search_term == result_entry['result_text']: | |
entry = { | |
'function_name': function_name, | |
'step': args_dict.get('step'), | |
'rounds': args_dict.get('rounds'), | |
'result': result_entry | |
} | |
results.append(entry) | |
elif search_mode == "Contains Word" and search_term in result_entry['result_text']: | |
entry = { | |
'function_name': function_name, | |
'step': args_dict.get('step'), | |
'rounds': args_dict.get('rounds'), | |
'result': result_entry | |
} | |
results.append(entry) | |
except (json.JSONDecodeError, TypeError) as e: | |
logger.error(f"Error processing row: {e}") | |
continue | |
elif search_type == "Gematria Sum in results": | |
# Optimization: If only main_book_filter is selected, don't perform a full search | |
if main_book_filter != "All" and rounds_filter == "All" and not gematria_sum_search: | |
return results | |
if not isinstance(gematria_sum_search, (int, float)): | |
return results | |
cursor.execute("SELECT * FROM els_cache") | |
all_results = cursor.fetchall() | |
columns = [desc[0] for desc in cursor.description] | |
for row in all_results: | |
row_dict = dict(zip(columns, row)) | |
args_dict = json.loads(row_dict['args']) | |
function_name = row_dict['function_name'] | |
# Function name filtering | |
include_result = False | |
if main_book_filter == "All": | |
include_result = True | |
elif main_book_filter == "Torah" and function_name == "torah.process_json_files": | |
include_result = True | |
elif main_book_filter == "Bible" and function_name == "bible.process_json_files": | |
include_result = True | |
elif main_book_filter == "Quran" and function_name == "quran.process_json_files": | |
include_result = True | |
elif main_book_filter == "Rig Veda" and function_name == "hindu.process_json_files": | |
include_result = True | |
elif main_book_filter == "Tripitaka" and function_name == "tripitaka.process_json_files": | |
include_result = True | |
if not include_result: | |
continue | |
# Rounds filtering | |
if rounds_filter != "All" and args_dict.get('rounds') != rounds_filter: | |
continue | |
try: | |
results_json = json.loads(row_dict['results']) | |
for result_entry in results_json: | |
if 'result_sum' in result_entry and result_entry[ | |
'result_sum'] == gematria_sum_search: | |
entry = { | |
'function_name': function_name, | |
'step': args_dict.get('step'), | |
'rounds': args_dict.get('rounds'), | |
'result': result_entry | |
} | |
results.append(entry) | |
except (json.JSONDecodeError, TypeError) as e: | |
logger.error(f"Error processing row: {e}") | |
continue | |
# Sort results by gematria sum | |
results.sort( | |
key=lambda x: x['result']['result_sum'] if 'result' in x and 'result_sum' in x['result'] else 0) | |
return results | |
except sqlite3.Error as e: | |
logger.error(f"Database error: {e}") | |
return [] | |
def update_search_components(search_type): | |
"""Updates the visibility of the search term and gematria sum input fields.""" | |
if search_type == "Text in result_text": | |
return {"visible": True, "__type__": "update"}, {"visible": False, "__type__": "update"} | |
else: | |
return {"visible": False, "__type__": "update"}, {"visible": True, "__type__": "update"} | |
with gr.Tab("Date Range Synchronicity + Journal-sum Search"): | |
with gr.Row(): | |
start_date_jr = Calendar(type="datetime", label="Start Date") | |
end_date_jr = Calendar(type="datetime", label="End Date") | |
with gr.Row(): | |
names_input_jr = gr.Textbox(label="Names (one per line)", lines=5) | |
search_sum_jr = gr.Number(label="Search Journal Sum", precision=0) | |
with gr.Row(): | |
calculate_btn_jr = gr.Button("Search Journal Sum") | |
shared_sums_btn_jr = gr.Button("Find Shared Journal Sums") | |
matching_dates_output_jr = gr.JSON(label="Matching Dates") | |
shared_sums_output_jr = gr.JSON(label="Shared Journal Sums") | |
calculate_btn_jr.click( | |
lambda start_date, end_date, names_input, search_sum: calculate_and_find_dates( | |
start_date, end_date, names_input, search_sum, find_shared=False), | |
inputs=[start_date_jr, end_date_jr, names_input_jr, search_sum_jr], | |
outputs=[matching_dates_output_jr, shared_sums_output_jr] | |
) | |
shared_sums_btn_jr.click( | |
lambda start_date, end_date, names_input: calculate_and_find_dates( | |
start_date, end_date, names_input, 0, find_shared=True), | |
inputs=[start_date_jr, end_date_jr, names_input_jr], | |
outputs=[matching_dates_output_jr, shared_sums_output_jr] | |
) | |
with gr.Tab("Date Range ELS Journal Search"): | |
with gr.Row(): | |
start_date_els = Calendar(type="datetime", label="Start Date") | |
end_date_els = Calendar(type="datetime", label="End Date") | |
with gr.Row(): | |
names_input_els = gr.Textbox(label="Names (one per line)", lines=5) | |
with gr.Row(): | |
search_type_els = gr.Radio( | |
label="Search by", | |
choices=["Text in result_text", "Gematria Sum in results"], | |
value="Text in result_text" | |
) | |
with gr.Row(): | |
search_mode_els = gr.Radio( | |
label="Search Mode", | |
choices=["Exact Search", "Contains Word"], | |
value="Contains Word" | |
) | |
with gr.Row(): | |
search_term_els = gr.Textbox(label="Search Term", visible=True) | |
gematria_sum_search_els = gr.Number(label="Gematria Sum", visible=False) | |
with gr.Row(): | |
include_torah_chk_els = gr.Checkbox(label="Include Torah", value=True) | |
include_bible_chk_els = gr.Checkbox(label="Include Bible", value=True) | |
include_quran_chk_els = gr.Checkbox(label="Include Quran", value=True) | |
include_hindu_chk_els = gr.Checkbox(label="Include Rigveda", value=False) | |
include_tripitaka_chk_els = gr.Checkbox(label="Include Tripitaka", value=False) | |
with gr.Row(): | |
translate_results_chk_els = gr.Checkbox(label="Translate Results to English", value=False) | |
with gr.Row(): | |
sub_oscillation_search_chk_els = gr.Checkbox(label="Search in Sub-Oscillations", value=False) # Neue Checkbox | |
sub_oscillation_level_els = gr.Number(label="Sub-Oscillation Level (0 = off)", precision=0, value=1) | |
with gr.Row(): | |
perform_search_btn_els = gr.Button("Perform Search") | |
filtered_results_output_els = gr.JSON(label="Filtered Results") | |
# Funktionen zur Aktualisierung der Sichtbarkeit der Sucheingabefelder | |
def update_search_components_els(search_type): | |
if search_type == "Text in result_text": | |
return gr.Textbox(visible=True), gr.Number(visible=False) | |
else: | |
return gr.Textbox(visible=False), gr.Number(visible=True) | |
search_type_els.change( | |
fn=update_search_components_els, | |
inputs=[search_type_els], | |
outputs=[search_term_els, gematria_sum_search_els] | |
) | |
def perform_els_search_for_gematria_sum( | |
gematria_sum, | |
include_torah, | |
include_bible, | |
include_quran, | |
include_hindu, | |
include_tripitaka | |
): | |
""" | |
Calls your actual ELS search function, returning a dict like: | |
{ | |
"Torah": [ { "result_sum": ..., "result_text": ..., "source_language": ...}, ...], | |
"Bible": [...], | |
... | |
} | |
""" | |
return perform_els_search( | |
step=gematria_sum, | |
rounds_combination="1,-0.5,0.5,-1", | |
tlang="english", | |
strip_spaces=True, | |
strip_in_braces=True, | |
strip_diacritics_chk=True, | |
include_torah=include_torah, | |
include_bible=include_bible, | |
include_quran=include_quran, | |
include_hindu=include_hindu, | |
include_tripitaka=include_tripitaka | |
) | |
def perform_sub_oscillation_search( | |
base_results_list, | |
initial_gematria_sum, | |
level, | |
include_torah, | |
include_bible, | |
include_quran, | |
include_hindu, | |
include_tripitaka | |
): | |
""" | |
Recursively populates 'subresults' for each base entry, up to the specified level. | |
base_results_list: A list of dicts, each shaped like: | |
{ | |
"book": "Torah", | |
"result": { | |
"result_sum": ..., | |
"result_text": ... | |
}, | |
"subresults": [] | |
} | |
initial_gematria_sum: The base sum from day+name (or from prior recursion). | |
level: how many sub-levels we still want to descend. | |
include_XXX: booleans for controlling which texts to include. | |
""" | |
if level <= 0: | |
return base_results_list | |
for base_entry in base_results_list: | |
parent_sum = base_entry["result"]["result_sum"] | |
combined_sum = initial_gematria_sum + parent_sum | |
# Next-level search | |
sub_search_results = perform_els_search_for_gematria_sum( | |
gematria_sum=combined_sum, | |
include_torah=include_torah, | |
include_bible=include_bible, | |
include_quran=include_quran, | |
include_hindu=include_hindu, | |
include_tripitaka=include_tripitaka | |
) | |
# Build child entries | |
child_entries = [] | |
for book_name, res_list in sub_search_results.items(): | |
for one_res in res_list: | |
child_entries.append({ | |
"book": book_name, | |
"result": one_res, | |
"subresults": [] | |
}) | |
# Attach to subresults | |
base_entry["subresults"].extend(child_entries) | |
# Recurse deeper | |
perform_sub_oscillation_search( | |
base_entry["subresults"], | |
initial_gematria_sum, | |
level - 1, | |
include_torah, | |
include_bible, | |
include_quran, | |
include_hindu, | |
include_tripitaka | |
) | |
return base_results_list | |
def matches_criteria(item, search_type, search_term, gematria_sum_search, search_mode): | |
""" | |
Checks if a single item (a dict with 'result_text', 'result_sum', etc.) | |
matches the user’s chosen filter criteria. | |
item shape example: | |
{ | |
"book": "Torah", | |
"result": { | |
"result_text": "foo bar", | |
"result_sum": 1234, | |
... | |
}, | |
"subresults": [ ... ] | |
} | |
""" | |
result_text = item["result"].get("result_text", "") | |
result_sum = item["result"].get("result_sum", None) | |
if search_type == "Text in result_text": | |
if search_mode == "Exact Search": | |
return (result_text == search_term) | |
else: # "Contains Word" | |
return (search_term in result_text) | |
else: | |
# search_type == "Gematria Sum in results" | |
return (result_sum == gematria_sum_search) | |
def prune_tree_by_search_criteria(results_list, search_type, search_term, gematria_sum_search, search_mode): | |
""" | |
Recursively filters a list of items so that: | |
- An item is included if it directly matches the search criteria, | |
OR if any of its child subresults match (in which case we keep | |
the item *and* the matching children). | |
- This preserves parent chain for matching subresults. | |
returns a new, pruned list of items with the same nested structure, | |
but only containing the branches that match or lead to a match. | |
""" | |
pruned_list = [] | |
for item in results_list: | |
# Recurse into subresults first | |
pruned_sub = prune_tree_by_search_criteria( | |
item["subresults"], | |
search_type, | |
search_term, | |
gematria_sum_search, | |
search_mode | |
) | |
does_item_match = matches_criteria(item, search_type, search_term, gematria_sum_search, search_mode) | |
# If the item itself matches OR any child matches | |
if does_item_match or len(pruned_sub) > 0: | |
# Make a copy of the item so we don't destroy the original | |
new_item = copy.deepcopy(item) | |
# Overwrite subresults with the pruned version | |
new_item["subresults"] = pruned_sub | |
pruned_list.append(new_item) | |
return pruned_list | |
def translate_subresults_recursive(subresults_list): | |
""" | |
Recursively translates the 'result_text' of each item in subresults_list. | |
We only visit items that remain in the filtered results. | |
""" | |
for sub_item in subresults_list: | |
text = sub_item["result"].get("result_text", "") | |
source_lang = sub_item["result"].get("source_language", "auto") | |
if text: | |
translated_text = translation_utils.get_translation(text, "en", source_lang)[0] | |
sub_item["result"]["translated_text"] = translated_text | |
# Recurse into deeper subresults | |
if sub_item["subresults"]: | |
translate_subresults_recursive(sub_item["subresults"]) | |
def perform_date_range_els_search( | |
start_date, | |
end_date, | |
names_input, | |
search_type, | |
search_term, | |
gematria_sum_search, | |
search_mode, | |
include_torah, | |
include_bible, | |
include_quran, | |
include_hindu, | |
include_tripitaka, | |
translate_results, | |
sub_oscillation_search, | |
sub_oscillation_level | |
): | |
""" | |
- Builds a fully nested sub-oscillation structure if requested. | |
- Then prunes it so only matching items remain (plus parents). | |
- Finally, optionally translates the pruned items if requested. | |
- Returns the filtered (and optionally translated) structure. | |
""" | |
# 1) Compute gematria sums for all dates in the chosen range | |
names = [n.strip() for n in names_input.split("\n") if n.strip()] | |
date_gematrias = perform_gematria_calculation_for_date_range(start_date, end_date) | |
all_filtered_results = [] | |
# 2) Loop over each date | |
for date_str, date_data in date_gematrias.items(): | |
for name in names: | |
# base sum: day gematria + name gematria | |
name_gem = calculate_gematria_sum(name, "") | |
initial_gem = date_data["date_gematria"] + name_gem | |
date_words = date_data["date_words"] | |
search_path_basic = f"{name} {date_words}" | |
# 3) Do the base ELS search (this is the top-level results) | |
base_results = perform_els_search_for_gematria_sum( | |
gematria_sum=initial_gem, | |
include_torah=include_torah, | |
include_bible=include_bible, | |
include_quran=include_quran, | |
include_hindu=include_hindu, | |
include_tripitaka=include_tripitaka | |
) | |
# 4) Convert "base_results" into a uniform nested format | |
# shaped like { "0": {"book":..., "results": [ {...}, ... ]}, ... } | |
formatted_base_results = {} | |
idx_counter = 0 | |
for book_name, book_res_list in base_results.items(): | |
if book_res_list: | |
formatted_base_results[str(idx_counter)] = { | |
"book": book_name, | |
"results": [] | |
} | |
for res in book_res_list: | |
formatted_base_results[str(idx_counter)]["results"].append({ | |
"book": book_name, | |
"result": res, | |
"subresults": [] | |
}) | |
idx_counter += 1 | |
# 5) Flatten out for sub-osc recursion | |
base_results_list = [] | |
for val in formatted_base_results.values(): | |
base_results_list.extend(val["results"]) | |
# 6) If sub-oscillation is requested, build deeper levels | |
if sub_oscillation_search and sub_oscillation_level > 0 and base_results_list: | |
perform_sub_oscillation_search( | |
base_results_list, | |
initial_gem, | |
sub_oscillation_level, | |
include_torah, | |
include_bible, | |
include_quran, | |
include_hindu, | |
include_tripitaka | |
) | |
# 7) Now we prune (filter) the results according to the user’s search criteria | |
# For each "book" block, we filter the "results" array | |
pruned_base_results = {} | |
for key, block in formatted_base_results.items(): | |
original_list = block["results"] # each is {book, result, subresults} | |
filtered_list = prune_tree_by_search_criteria( | |
original_list, | |
search_type, | |
search_term, | |
gematria_sum_search, | |
search_mode | |
) | |
# Keep only if we got something back | |
if filtered_list: | |
pruned_base_results[key] = { | |
"book": block["book"], | |
"results": filtered_list | |
} | |
# If there’s nothing left after filtering, skip adding | |
if not pruned_base_results: | |
continue | |
# 8) Build the final item for this date+name, with pruned results | |
final_item = { | |
"date": date_str, | |
"name": name, | |
"search_path_basic": search_path_basic, | |
"basic_gematria": initial_gem, | |
"basic_results": pruned_base_results | |
} | |
# 9) If translation is requested, only translate the pruned results | |
if translate_results: | |
# Recursively translate only the items in pruned_base_results | |
for block_key, block_val in final_item["basic_results"].items(): | |
for entry in block_val["results"]: | |
txt = entry["result"].get("result_text", "") | |
src_lang = entry["result"].get("source_language", "auto") | |
if txt: | |
trans = translation_utils.get_translation(txt, "en", src_lang)[0] | |
entry["result"]["translated_text"] = trans | |
# And go deeper into subresults | |
if entry["subresults"]: | |
translate_subresults_recursive(entry["subresults"]) | |
# 10) Add this final, fully filtered (and possibly translated) item | |
all_filtered_results.append(final_item) | |
# Return the fully processed set of results | |
return all_filtered_results | |
def remove_duplicates(dict_list): | |
""" | |
If you need to remove duplicates at any stage, you can call this, | |
but it's optional if your data doesn't require deduping. | |
""" | |
seen = set() | |
unique_list = [] | |
for item in dict_list: | |
item_id = repr(item) | |
if item_id not in seen: | |
seen.add(item_id) | |
unique_list.append(item) | |
return unique_list | |
perform_search_btn_els.click( | |
perform_date_range_els_search, | |
inputs=[ | |
start_date_els, | |
end_date_els, | |
names_input_els, | |
search_type_els, | |
search_term_els, | |
gematria_sum_search_els, | |
search_mode_els, | |
include_torah_chk_els, | |
include_bible_chk_els, | |
include_quran_chk_els, | |
include_hindu_chk_els, | |
include_tripitaka_chk_els, | |
translate_results_chk_els, | |
sub_oscillation_search_chk_els, | |
sub_oscillation_level_els | |
], | |
outputs=[filtered_results_output_els] | |
) | |
with gr.Tab("ELS Journal Gematria-sum Synchronicity search"): | |
with gr.Row(): | |
start_date_comp = Calendar(type="datetime", label="Start Date") | |
end_date_comp = Calendar(type="datetime", label="End Date") | |
with gr.Row(): | |
names_input_comp = gr.Textbox(label="Names (one per line)", lines=5) | |
with gr.Row(): | |
sub_oscillation_level_comp = gr.Number(label="Sub-Oscillation Level", precision=0, value=0) | |
with gr.Row(): | |
include_torah_chk_comp = gr.Checkbox(label="Include Torah", value=True) | |
include_bible_chk_comp = gr.Checkbox(label="Include Bible", value=True) | |
include_quran_chk_comp = gr.Checkbox(label="Include Quran", value=True) | |
include_hindu_chk_comp = gr.Checkbox(label="Include Rigveda", value=False) | |
include_tripitaka_chk_comp = gr.Checkbox(label="Include Tripitaka", value=False) | |
with gr.Row(): | |
translate_results_chk_comp = gr.Checkbox(label="Translate Results to English", value=False) | |
with gr.Row(): | |
common_gematria_results = gr.JSON(label="Common Gematria Results") | |
with gr.Row(): | |
perform_search_btn_comp = gr.Button("Perform Search") | |
def perform_comprehensive_els_search( | |
start_date, | |
end_date, | |
names_input, | |
sub_oscillation_level, | |
include_torah, | |
include_bible, | |
include_quran, | |
include_hindu, | |
include_tripitaka, | |
translate_results | |
): | |
names = [n.strip() for n in names_input.split("\n") if n.strip()] | |
date_gematrias = perform_gematria_calculation_for_date_range(start_date, end_date) | |
all_results_by_date = {} | |
for date_str, date_data in date_gematrias.items(): | |
all_results_by_date[date_str] = {} | |
for name in names: | |
name_gem = calculate_gematria_sum(name, "") | |
initial_gem = date_data["date_gematria"] + name_gem | |
base_results_list = [] | |
base_results = perform_els_search_for_gematria_sum( | |
gematria_sum=initial_gem, | |
include_torah=include_torah, | |
include_bible=include_bible, | |
include_quran=include_quran, | |
include_hindu=include_hindu, | |
include_tripitaka=include_tripitaka | |
) | |
for book_name, res_list in base_results.items(): | |
for one_res in res_list: | |
if not isinstance(one_res, dict) or "result_sum" not in one_res: | |
print(f"Warning: Invalid result format for {name} on {date_str}: {one_res}") | |
continue | |
base_results_list.append({ | |
"book": book_name, | |
"result": one_res, | |
"subresults": [] | |
}) | |
if sub_oscillation_level > 0 and base_results_list: | |
perform_sub_oscillation_search( | |
base_results_list, | |
initial_gem, | |
sub_oscillation_level, | |
include_torah, | |
include_bible, | |
include_quran, | |
include_hindu, | |
include_tripitaka | |
) | |
all_results_by_date[date_str][name] = base_results_list | |
common_results = {} | |
for date_str, name_results in all_results_by_date.items(): | |
common_results[date_str] = {} | |
sums_by_name = {} | |
all_sums_count = Counter() | |
for name, results in name_results.items(): | |
sums_by_name[name] = set() | |
count_sums_recursive(results, all_sums_count) | |
extract_sums_recursive(results, sums_by_name[name]) | |
ordered_common_sums = [item for item, count in all_sums_count.most_common() if all(item in sums_by_name[n] for n in sums_by_name)] | |
for common_sum in ordered_common_sums: | |
common_results[date_str][str(common_sum)] = {} | |
for name, results in name_results.items(): | |
matching_items = [] | |
for item in results: | |
if find_results_by_sum([item], common_sum): | |
item_copy = copy.deepcopy(item) | |
if translate_results: | |
translate_recursive(item_copy) | |
item_copy["subresults"] = find_results_by_sum(item.get("subresults", []), common_sum) | |
if translate_results: | |
for sub_item in item_copy["subresults"]: | |
translate_recursive(sub_item) | |
matching_items.append(item_copy) | |
if matching_items: | |
common_results[date_str][str(common_sum)][name] = matching_items | |
return common_results | |
def find_results_by_sum(results, target_sum): | |
matches = [] | |
if isinstance(results, list): | |
for item in results: | |
matches.extend(find_results_by_sum(item, target_sum)) | |
elif isinstance(results, dict): | |
if "result" in results and isinstance(results["result"], dict) and "result_sum" in results["result"]: | |
try: | |
result_sum = int(results["result"]["result_sum"]) | |
if result_sum == target_sum: | |
matches.append(results) | |
except (TypeError, ValueError) as e: | |
print(f"Error comparing result_sum: {e}, Data: {results}") | |
if "subresults" in results: | |
matches.extend(find_results_by_sum(results["subresults"], target_sum)) | |
return matches | |
def extract_sums_recursive(data, sums_set): | |
if isinstance(data, list): | |
for item in data: | |
extract_sums_recursive(item, sums_set) | |
elif isinstance(data, dict): | |
if "result" in data and isinstance(data["result"], dict) and "result_sum" in data["result"]: | |
try: | |
result_sum = int(data["result"]["result_sum"]) | |
sums_set.add(result_sum) | |
except (TypeError, ValueError) as e: | |
print(f"Error extracting result_sum: {e}, Data: {data}") | |
if "subresults" in data: | |
extract_sums_recursive(data["subresults"], sums_set) | |
def count_sums_recursive(data, sums_counter): | |
if isinstance(data, list): | |
for item in data: | |
count_sums_recursive(item, sums_counter) | |
elif isinstance(data, dict): | |
if "result" in data and isinstance(data["result"], dict) and "result_sum" in data["result"]: | |
try: | |
result_sum = int(data["result"]["result_sum"]) | |
sums_counter[result_sum] += 1 | |
except (TypeError, ValueError) as e: | |
print(f"Error counting result_sum: {e}, Data: {data}") | |
if "subresults" in data: | |
count_sums_recursive(data["subresults"], sums_counter) | |
def translate_recursive(item): | |
if "result" in item and isinstance(item["result"], dict) and "result_text" in item["result"]: | |
text = item["result"].get("result_text", "") | |
source_lang = item["result"].get("source_language", "auto") | |
if text: | |
translated_text = translation_utils.get_translation(text, "en", source_lang)[0] | |
item["result"]["translated_text"] = translated_text | |
if "subresults" in item and isinstance(item["subresults"], list): | |
for sub in item["subresults"]: | |
translate_recursive(sub) | |
perform_search_btn_comp.click( | |
perform_comprehensive_els_search, | |
inputs=[ | |
start_date_comp, | |
end_date_comp, | |
names_input_comp, | |
sub_oscillation_level_comp, | |
include_torah_chk_comp, | |
include_bible_chk_comp, | |
include_quran_chk_comp, | |
include_hindu_chk_comp, | |
include_tripitaka_chk_comp, | |
translate_results_chk_comp | |
], | |
outputs=[common_gematria_results] | |
) | |
# --- Event Handlers --- | |
search_type.change( | |
fn=update_search_components, | |
inputs=[search_type], | |
outputs=[search_term, gematria_sum_search] | |
) | |
search_db_btn.click( | |
fn=search_cache_database, | |
inputs=[search_type, search_term, gematria_sum_search, main_book_filter, rounds_filter, search_mode], | |
outputs=cache_search_results | |
) | |
def update_rounds_choices(): | |
return gr.update(choices=extract_rounds_combinations()) # gr.update, nicht gr.Dropdown.update | |
app.load(fn=update_rounds_choices, inputs=None, outputs=rounds_filter) | |
main_book_filter.change( | |
fn=update_rounds_choices, | |
inputs=None, # No input needed here | |
outputs=rounds_filter | |
) | |
# rest of the handlers | |
def update_date_words(selected_date, date_language_input, use_day, use_month, use_year): | |
if selected_date is None: | |
return "" | |
if not use_year and not use_month and not use_day: | |
return translate_date_to_words(selected_date, date_language_input) | |
year = selected_date.year if use_year else None | |
month = selected_date.month if use_month else None | |
day = selected_date.day if use_day else None | |
if year is not None and month is not None and day is not None: | |
date_obj = selected_date | |
elif year is not None and month is not None: | |
date_obj = str(f"{year}-{month}") | |
elif year is not None: | |
date_obj = str(f"{year}") | |
else: # Return empty string if no date components are selected | |
return "" | |
date_in_words = date_to_words(date_obj) | |
translator = GoogleTranslator(source='auto', target=date_language_input) | |
translated_date_words = translator.translate(date_in_words) | |
return custom_normalize(translated_date_words) | |
def update_journal_sum(gematria_text, date_words_output): | |
sum_value = calculate_gematria_sum(gematria_text, date_words_output) | |
return sum_value, sum_value, sum_value | |
def update_rounds_combination(round_x, round_y): | |
return f"{int(round_x)},{int(round_y)}" | |
def update_step_half(float_step): | |
new_step = math.ceil(float_step / 2) | |
return new_step, float_step / 2 | |
def update_step_double(float_step): | |
new_step = math.ceil(float_step * 2) | |
return new_step, float_step * 2 | |
def find_closest_phrase(target_phrase, phrases): | |
best_match = None | |
best_score = 0 | |
logging.debug(f"Target phrase for similarity search: {target_phrase}") | |
for phrase, _, _, _, _, _ in phrases: | |
word_length_diff = abs(len(target_phrase.split()) - len(phrase.split())) | |
similarity_score = fuzz.ratio(target_phrase, phrase) | |
combined_score = similarity_score - word_length_diff | |
logging.debug(f"Comparing with phrase: {phrase}") | |
logging.debug( | |
f"Word Length Difference: {word_length_diff}, Similarity Score: {similarity_score}, Combined Score: {combined_score}") | |
if combined_score > best_score: | |
best_score = combined_score | |
best_match = phrase | |
logging.debug(f"Closest phrase found: {best_match} with score: {best_score}") | |
return best_match | |
def perform_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, | |
include_torah, include_bible, include_quran, include_hindu, include_tripitaka, gematria_text, | |
date_words_output, selected_date): | |
els_results = perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, | |
strip_diacritics_chk, include_torah, include_bible, include_quran, | |
include_hindu, | |
include_tripitaka) | |
most_frequent_phrases = {} | |
combined_and_sorted_results = [] | |
for book_name, book_results in els_results.items(): | |
if book_results: | |
most_frequent_phrases[book_name] = "" | |
for result in book_results: | |
try: | |
gematria_sum = calculate_gematria(result['result_text']) | |
max_words = len(result['result_text'].split()) | |
matching_phrases = search_gematria_in_db(gematria_sum, max_words) | |
max_words_limit = 20 | |
while not matching_phrases and max_words < max_words_limit: | |
max_words += 1 | |
matching_phrases = search_gematria_in_db(gematria_sum, max_words) | |
if matching_phrases: | |
most_frequent_phrase = get_most_frequent_phrase(matching_phrases) | |
most_frequent_phrases[book_name] = most_frequent_phrase | |
else: | |
# closest_phrase = find_closest_phrase(result['result_text'], | |
# search_gematria_in_db(gematria_sum, max_words_limit)) # Removed fuzzywuzzy | |
most_frequent_phrases[ | |
book_name] = "" # closest_phrase or "" | |
result['Most Frequent Phrase'] = most_frequent_phrases[book_name] | |
if 'book' in result: | |
if isinstance(result['book'], int): | |
result['book'] = f"{book_name} {result['book']}." | |
combined_and_sorted_results.append(result) | |
except KeyError as e: | |
print(f"DEBUG: KeyError - Key '{e.args[0]}' not found in result. Skipping this result.") | |
continue | |
selected_language_long = tlang | |
tlang_short = LANGUAGES_SUPPORTED.get(selected_language_long) | |
if tlang_short is None: | |
tlang_short = "en" | |
logger.warning(f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).") | |
phrases_to_translate = [] | |
phrases_source_langs = [] | |
results_to_translate = [] | |
results_source_langs = [] | |
for result in combined_and_sorted_results: | |
phrases_to_translate.append(result.get('Most Frequent Phrase', '')) | |
phrases_source_langs.append("he") | |
results_to_translate.append(result.get('result_text', '')) | |
results_source_langs.append(result.get("source_language", "auto")) | |
translated_phrases = translation_utils.batch_translate(phrases_to_translate, tlang_short, | |
phrases_source_langs) | |
translated_result_texts = translation_utils.batch_translate(results_to_translate, tlang_short, | |
results_source_langs) | |
for i, result in enumerate(combined_and_sorted_results): | |
result['translated_text'] = translated_result_texts.get(results_to_translate[i], None) | |
result['Translated Most Frequent Phrase'] = translated_phrases.get(phrases_to_translate[i], None) | |
updated_els_results = add_24h_projection(els_results) | |
updated_els_results = add_monthly_projection(updated_els_results, selected_date) | |
updated_els_results = add_yearly_projection(updated_els_results, selected_date) | |
combined_and_sorted_results = [] | |
for book_results in updated_els_results.values(): | |
combined_and_sorted_results.extend(book_results) | |
combined_and_sorted_results = sort_results(combined_and_sorted_results) | |
df = pd.DataFrame(combined_and_sorted_results) | |
df.index = range(1, len(df) + 1) | |
df.reset_index(inplace=True) | |
df.rename(columns={'index': 'Result Number'}, inplace=True) | |
for i, result in enumerate(combined_and_sorted_results): | |
result['Result Number'] = i + 1 | |
search_config = { | |
"step": step, | |
"rounds_combination": rounds_combination, | |
"target_language": tlang, | |
"strip_spaces": strip_spaces, | |
"strip_in_braces": strip_in_braces, | |
"strip_diacritics": strip_diacritics_chk, | |
"include_torah": include_torah, | |
"include_bible": include_bible, | |
"include_quran": include_quran, | |
"include_hindu": include_hindu, | |
"include_tripitaka": include_tripitaka, | |
"gematria_text": gematria_text, | |
"date_words": date_words_output | |
} | |
output_data = { | |
"search_configuration": search_config, | |
"results": combined_and_sorted_results | |
} | |
json_data = output_data | |
combined_most_frequent = "\n".join( | |
f"{book}: {phrase}" for book, phrase in most_frequent_phrases.items()) | |
return df, combined_most_frequent, json_data | |
# --- Event Triggers --- | |
round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination) | |
round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination) | |
selected_date.change(update_date_words, inputs=[selected_date, date_language_input, use_day, use_month, use_year], | |
outputs=[date_words_output]) | |
date_language_input.change(update_date_words, | |
inputs=[selected_date, date_language_input, use_day, use_month, use_year], | |
outputs=[date_words_output]) | |
gematria_text.change(update_journal_sum, inputs=[gematria_text, date_words_output], | |
outputs=[gematria_result, step, float_step]) | |
date_words_output.change(update_journal_sum, inputs=[gematria_text, date_words_output], | |
outputs=[gematria_result, step, float_step]) | |
half_step_btn.click(update_step_half, inputs=[float_step], outputs=[step, float_step]) | |
double_step_btn.click(update_step_double, inputs=[float_step], outputs=[step, float_step]) | |
translate_btn.click( | |
perform_search, | |
inputs=[step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah_chk, | |
include_bible_chk, include_quran_chk, include_hindu_chk, include_tripitaka_chk, gematria_text, | |
date_words_output, selected_date], | |
outputs=[markdown_output, most_frequent_phrase_output, json_output] | |
) | |
app.load( | |
update_date_words, | |
inputs=[selected_date, date_language_input, use_day, use_month, use_year], | |
outputs=[date_words_output] | |
) | |
use_day.change( | |
update_date_words, | |
inputs=[selected_date, date_language_input, use_day, use_month, use_year], | |
outputs=[date_words_output] | |
) | |
use_month.change( | |
update_date_words, | |
inputs=[selected_date, date_language_input, use_day, use_month, use_year], | |
outputs=[date_words_output] | |
) | |
use_year.change( | |
update_date_words, | |
inputs=[selected_date, date_language_input, use_day, use_month, use_year], | |
outputs=[date_words_output] | |
) | |
def checkbox_behavior(use_day_value, use_month_value): | |
if use_day_value: | |
return True, True | |
return use_month_value, True | |
use_day.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year]) | |
use_month.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year]) | |
if __name__ == "__main__": | |
app.launch(share=False) | |