import logging import os logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) import gradio as gr import torah import bible import quran import hindu import tripitaka from utils import number_to_ordinal_word, custom_normalize, date_to_words, translate_date_to_words from gematria import calculate_gematria, strip_diacritics import pandas as pd from deep_translator import GoogleTranslator from gradio_calendar import Calendar from datetime import datetime, timedelta import math import json import re import sqlite3 from collections import defaultdict from typing import List, Tuple # import rich # Removed rich # from fuzzywuzzy import fuzz # Removed fuzzywuzzy import calendar import translation_utils import hashlib translation_utils.create_translation_table() # Create a translator instance *once* globally translator = GoogleTranslator(source='auto', target='auto') LANGUAGES_SUPPORTED = translator.get_supported_languages(as_dict=True) LANGUAGE_CODE_MAP = LANGUAGES_SUPPORTED # Use deep_translator's mapping directly # --- Constants --- DATABASE_FILE = 'gematria.db' MAX_PHRASE_LENGTH_LIMIT = 20 ELS_CACHE_DB = "els_cache.db" DATABASE_TIMEOUT = 60 # --- ELS Cache Functions --- def create_els_cache_table(): if not os.path.exists(ELS_CACHE_DB): with sqlite3.connect(ELS_CACHE_DB) as conn: conn.execute(''' CREATE TABLE IF NOT EXISTS els_cache ( query_hash TEXT PRIMARY KEY, function_name TEXT, args TEXT, kwargs TEXT, results TEXT ) ''') # --- Database Initialization --- def initialize_database(): global conn conn = sqlite3.connect(DATABASE_FILE) cursor = conn.cursor() cursor.execute(''' CREATE TABLE IF NOT EXISTS results ( gematria_sum INTEGER, words TEXT, translation TEXT, book TEXT, chapter INTEGER, verse INTEGER, phrase_length INTEGER, word_position TEXT, PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position) ) ''') cursor.execute(''' CREATE INDEX IF NOT EXISTS idx_results_gematria ON results (gematria_sum) ''') cursor.execute(''' CREATE TABLE IF NOT EXISTS processed_books ( book TEXT PRIMARY KEY, max_phrase_length INTEGER ) ''') conn.commit() # --- Initialize Database --- initialize_database() # --- ELS Cache Functions --- def create_els_cache_table(): with sqlite3.connect(ELS_CACHE_DB) as conn: try: conn.execute(''' CREATE TABLE IF NOT EXISTS els_cache ( query_hash TEXT PRIMARY KEY, function_name TEXT, args TEXT, kwargs TEXT, results TEXT ) ''') except sqlite3.OperationalError as e: logger.error(f"Error creating table: {e}") def get_query_hash(func, args, kwargs): key = (func.__name__, args, kwargs) return hashlib.sha256(json.dumps(key).encode()).hexdigest() def cached_process_json_files(func, *args, **kwargs): # Create a dictionary to store the parameters params = { "function": f"{func.__module__}.{func.__name__}" } # Add the positional arguments with their names arg_names = func.__code__.co_varnames[:func.__code__.co_argcount] for name, value in zip(arg_names, args): params[name] = value # Add the keyword arguments for name, value in kwargs.items(): params[name] = value # Convert the parameters to a JSON string params_json = json.dumps(params) # Use the parameters JSON string to generate the query hash query_hash = get_query_hash(func, params_json, "") # Ensure the table exists before any operations create_els_cache_table() try: with sqlite3.connect(ELS_CACHE_DB, timeout=DATABASE_TIMEOUT) as conn: cursor = conn.cursor() cursor.execute( "SELECT results FROM els_cache WHERE query_hash = ?", (query_hash,)) result = cursor.fetchone() if result: logger.info(f"Cache hit for query: {query_hash}") return json.loads(result[0]) except sqlite3.Error as e: logger.error(f"Database error checking cache: {e}") logger.info(f"Cache miss for query: {query_hash}") results = func(*args, **kwargs) try: with sqlite3.connect(ELS_CACHE_DB, timeout=DATABASE_TIMEOUT) as conn: cursor = conn.cursor() cursor.execute( "INSERT INTO els_cache (query_hash, function_name, args, kwargs, results) VALUES (?, ?, ?, ?, ?)", (query_hash, params["function"], params_json, json.dumps({}), json.dumps(results))) conn.commit() except sqlite3.Error as e: logger.error(f"Database error caching results: {e}") return results # --- Helper Functions (from Network app.py) --- def flatten_text(text: List) -> str: if isinstance(text, list): return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text) return text def search_gematria_in_db(gematria_sum: int, max_words: int) -> List[Tuple[str, str, int, int, int, str]]: global conn with sqlite3.connect(DATABASE_FILE) as conn: cursor = conn.cursor() cursor.execute(''' SELECT words, book, chapter, verse, phrase_length, word_position FROM results WHERE gematria_sum = ? AND phrase_length <= ? ''', (gematria_sum, max_words)) results = cursor.fetchall() return results def get_most_frequent_phrase(results): phrase_counts = defaultdict(int) for words, book, chapter, verse, phrase_length, word_position in results: phrase_counts[words] += 1 most_frequent_phrase = max(phrase_counts, key=phrase_counts.get) if phrase_counts else None return most_frequent_phrase # --- Functions from BOS app.py --- def create_language_dropdown(label, default_value='English', show_label=True): return gr.Dropdown( choices=list(LANGUAGE_CODE_MAP.keys()), label=label, value=default_value, show_label=show_label ) def calculate_gematria_sum(text, date_words): if text or date_words: combined_input = f"{text} {date_words}" logger.info(f"searching for input: {combined_input}") numbers = re.findall(r'\d+', combined_input) text_without_numbers = re.sub(r'\d+', '', combined_input) number_sum = sum(int(number) for number in numbers) text_gematria = calculate_gematria(strip_diacritics(text_without_numbers)) total_sum = text_gematria + number_sum return total_sum else: return None def perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah, include_bible, include_quran, include_hindu, include_tripitaka): if step == 0 or rounds_combination == "0,0": return None results = {} length = 0 selected_language_long = tlang tlang = LANGUAGES_SUPPORTED.get(selected_language_long) if tlang is None: tlang = "en" logger.warning( f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).") if include_torah: logger.debug( f"Arguments for Torah: {(1, 39, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)}") results["Torah"] = cached_process_json_files(torah.process_json_files, 1, 39, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk) else: results["Torah"] = [] if include_bible: results["Bible"] = cached_process_json_files(bible.process_json_files, 40, 66, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk) else: results["Bible"] = [] if include_quran: results["Quran"] = cached_process_json_files(quran.process_json_files, 1, 114, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk) else: results["Quran"] = [] if include_hindu: results["Rig Veda"] = cached_process_json_files( hindu.process_json_files, 1, 10, step, rounds_combination, length, tlang, False, strip_in_braces, strip_diacritics_chk) else: results["Rig Veda"] = [] if include_tripitaka: results["Tripitaka"] = cached_process_json_files( tripitaka.process_json_files, 1, 52, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk) else: results["Tripitaka"] = [] return results def add_24h_projection(results_dict): for book_name, results in results_dict.items(): num_results = len(results) if num_results > 0: time_interval = timedelta(minutes=24 * 60 / num_results) current_time = datetime.min.time() for i in range(num_results): next_time = (datetime.combine(datetime.min, current_time) + time_interval).time() time_range_str = f"{current_time.strftime('%H:%M')}-{next_time.strftime('%H:%M')}" results[i]['24h Projection'] = time_range_str current_time = next_time return results_dict def add_monthly_projection(results_dict, selected_date): if selected_date is None: return results_dict for book_name, results in results_dict.items(): num_results = len(results) if num_results > 0: days_in_month = calendar.monthrange(selected_date.year, selected_date.month)[1] total_seconds = (days_in_month - 1) * 24 * 3600 seconds_interval = total_seconds / num_results start_datetime = datetime(selected_date.year, selected_date.month, 1) current_datetime = start_datetime for i in range(num_results): next_datetime = current_datetime + timedelta(seconds=seconds_interval) current_date = current_datetime.date() next_date = next_datetime.date() date_range_str = f"{current_date.strftime('%h %d')} - {next_date.strftime('%h %d')}" results[i]['Monthly Projection'] = date_range_str current_datetime = next_datetime current_date = next_datetime.date() return results_dict def add_yearly_projection(results_dict, selected_date): if selected_date is None: return results_dict for book_name, results in results_dict.items(): num_results = len(results) if num_results > 0: days_in_year = 366 if calendar.isleap(selected_date.year) else 365 total_seconds = (days_in_year - 1) * 24 * 3600 seconds_interval = total_seconds / num_results start_datetime = datetime(selected_date.year, 1, 1) current_datetime = start_datetime for i in range(num_results): next_datetime = current_datetime + timedelta(seconds=seconds_interval) current_date = current_datetime.date() next_date = next_datetime.date() date_range_str = f"{current_date.strftime('%b %d')} - {next_date.strftime('%b %d')}" results[i]['Yearly Projection'] = date_range_str current_datetime = next_datetime return results_dict def sort_results(results): def parse_time(time_str): try: hours, minutes = map(int, time_str.split(':')) return hours * 60 + minutes except ValueError: return 24 * 60 return sorted(results, key=lambda x: ( parse_time(x.get('24h Projection', '23:59').split('-')[0]), parse_time(x.get('24h Projection', '23:59').split('-')[1]) )) def extract_rounds_combinations(): """Extracts unique rounds combinations from the database.""" combinations = set() try: with sqlite3.connect(ELS_CACHE_DB) as conn: cursor = conn.cursor() cursor.execute("SELECT args FROM els_cache") all_args = cursor.fetchall() for args_tuple in all_args: args_str = args_tuple[0] try: args_json = json.loads(args_str) if 'rounds' in args_json: combinations.add(args_json['rounds']) except json.JSONDecodeError: logger.error(f"Could not decode JSON for args: {args_str}") except sqlite3.Error as e: logger.error(f"Database error: {e}") logger.info(f"Found unique rounds combinations: {combinations}") return ["All"] + sorted(list(combinations)) def update_rounds_dropdown(): new_choices = extract_rounds_combinations() return new_choices def perform_gematria_calculation_for_date_range(start_date, end_date): logger.debug(f"Calculating date gematria for range: {start_date} - {end_date}") results = {} delta = timedelta(days=1) current_date = start_date while current_date <= end_date: date_string = current_date.strftime("%Y-%m-%d") date_words = date_to_words(date_string) date_gematria = calculate_gematria_sum(date_words, "") # Angepasst, um der Funktion calculate_gematria_sum zu entsprechen results[date_string] = { "date_words": date_words, "date_gematria": date_gematria, } current_date += delta logger.debug(f"Finished calculating date gematria.") return results def find_matching_dates(date_gematrias, names, search_journal_sum): logger.debug(f"Searching for matches with journal sum: {search_journal_sum}") matching_dates = {} for name in names: name_gematria = calculate_gematria_sum(name, "") # Angepasst, um der Funktion calculate_gematria_sum zu entsprechen target_date_gematria = search_journal_sum - name_gematria if name_gematria is not None else None logger.debug(f"Name: {name}, Gematria: {name_gematria}, Target Date Gematria: {target_date_gematria}") if target_date_gematria is not None: for date_str, date_data in date_gematrias.items(): if date_data["date_gematria"] == target_date_gematria: if name not in matching_dates: matching_dates[name] = [] matching_dates[name].append(date_str) logger.debug(f"Matches for {name}: {matching_dates.get(name, [])}") return matching_dates def find_shared_journal_sums(date_gematrias, names): """Finds shared journal sums and formats output with names and dates together.""" logger.debug("Calculating shared journal sums...") shared_sums = {} name_gematrias = {name: calculate_gematria_sum(name, "") for name in names} for date_str, date_data in date_gematrias.items(): date_gematria = date_data["date_gematria"] for name, name_gematria in name_gematrias.items(): journal_sum = date_gematria + name_gematria journal_sum_str = str(journal_sum) # Konvertiere den Schlüssel (journal_sum) in einen String if journal_sum_str not in shared_sums: shared_sums[journal_sum_str] = {} if name not in shared_sums[journal_sum_str]: shared_sums[journal_sum_str][name] = [] shared_sums[journal_sum_str][name].append(date_str) # Filter out sums not shared by at least two names and format output result = {} for journal_sum_str, data in shared_sums.items(): if len(data) >= 2: result[journal_sum_str] = {} for name, dates in data.items(): result[journal_sum_str][name] = dates logger.debug(f"Shared Journal Sums: {result}") return result def calculate_and_find_dates(start_date, end_date, names_input, search_journal_sum, find_shared=False): names = [n.strip() for n in names_input.split("\n") if n.strip()] date_gematrias = perform_gematria_calculation_for_date_range(start_date, end_date) if find_shared: shared_sums = find_shared_journal_sums(date_gematrias, names) return None, shared_sums else: matching_dates = find_matching_dates(date_gematrias, names, int(search_journal_sum)) return matching_dates, None # --- Main Gradio App --- with gr.Blocks() as app: with gr.Tab("ELS Search"): with gr.Column(): with gr.Row(): tlang = create_language_dropdown("Target Language for Result Translation", default_value='english') selected_date = Calendar(type="datetime", label="Date to investigate (optional)", info="Pick a date from the calendar") use_day = gr.Checkbox(label="Use Day", info="Check to include day in search", value=True) use_month = gr.Checkbox(label="Use Month", info="Check to include month in search", value=True) use_year = gr.Checkbox(label="Use Year", info="Check to include year in search", value=True) date_language_input = create_language_dropdown( "Language of the person/topic (optional) (Date Word Language)", default_value='english') with gr.Row(): gematria_text = gr.Textbox(label="Name and/or Topic (required)", value="Hans Albert Einstein Mileva Marity-Einstein") date_words_output = gr.Textbox(label="Date in Words Translated (optional)") gematria_result = gr.Number(label="Journal Sum") # with gr.Row(): with gr.Row(): step = gr.Number(label="Jump Width (Steps) for ELS") float_step = gr.Number(visible=False, value=1) half_step_btn = gr.Button("Steps / 2") double_step_btn = gr.Button("Steps * 2") with gr.Column(): round_x = gr.Number(label="Round (1)", value=1) round_y = gr.Number(label="Round (2)", value=-1) rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1") with gr.Row(): include_torah_chk = gr.Checkbox(label="Include Torah", value=True) include_bible_chk = gr.Checkbox(label="Include Bible", value=True) include_quran_chk = gr.Checkbox(label="Include Quran", value=True) include_hindu_chk = gr.Checkbox(label="Include Rigveda", value=False) include_tripitaka_chk = gr.Checkbox(label="Include Tripitaka", value=False) strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True) strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True) strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True) translate_btn = gr.Button("Search with ELS") # --- Output Components --- markdown_output = gr.Dataframe(label="ELS Results") most_frequent_phrase_output = gr.Textbox(label="Most Frequent Phrase in Network Search") json_output = gr.JSON(label="JSON Output") with gr.Tab("Cache Database Search"): with gr.Column(): with gr.Row(): main_book_filter = gr.Dropdown(label="Filter by Main Book", choices=["All", "Torah", "Bible", "Quran", "Rig Veda", "Tripitaka"], value="Torah") # Keine choices hier, nur das Label und den Initialwert rounds_filter = gr.Dropdown(label="Filter by Rounds", allow_custom_value=True, value="1,-1") with gr.Row(): search_type = gr.Radio(label="Search by", choices=["Text in result_text", "Gematria Sum in results"], value="Text in result_text") with gr.Row(): search_mode = gr.Radio(label="Search Mode", choices=["Exact Search", "Contains Word"], value="Contains Word") with gr.Row(): search_term = gr.Textbox(label="Search Term", visible=True) gematria_sum_search = gr.Number(label="Gematria Sum", visible=False) with gr.Row(): search_db_btn = gr.Button("Search Cache Database") with gr.Row(): cache_search_results = gr.JSON(label="Cache Search Results") def update_search_components(search_type): if search_type == "Text in result_text": return gr.Textbox.update(visible=True), gr.Number.update(visible=False) else: return gr.Textbox.update(visible=False), gr.Number.update(visible=True) def search_cache_database(search_type, search_term, gematria_sum_search, main_book_filter, rounds_filter, search_mode): """Searches the cache database based on the selected filters and search term.""" results = [] search_term = strip_diacritics(search_term) if main_book_filter == "All" and rounds_filter == "All" and not search_term and not gematria_sum_search: return results try: with sqlite3.connect(ELS_CACHE_DB) as conn: cursor = conn.cursor() if search_type == "Text in result_text": # Optimization: If only main_book_filter is selected, don't perform a full search if main_book_filter != "All" and rounds_filter == "All" and not search_term: return results cursor.execute("SELECT * FROM els_cache") all_results = cursor.fetchall() columns = [desc[0] for desc in cursor.description] for row in all_results: row_dict = dict(zip(columns, row)) args_dict = json.loads(row_dict['args']) function_name = row_dict['function_name'] # Function name filtering include_result = False if main_book_filter == "All": include_result = True elif main_book_filter == "Torah" and function_name == "torah.process_json_files": include_result = True elif main_book_filter == "Bible" and function_name == "bible.process_json_files": include_result = True elif main_book_filter == "Quran" and function_name == "quran.process_json_files": include_result = True elif main_book_filter == "Rig Veda" and function_name == "hindu.process_json_files": include_result = True elif main_book_filter == "Tripitaka" and function_name == "tripitaka.process_json_files": include_result = True if not include_result: continue # Rounds filtering if rounds_filter != "All" and args_dict.get('rounds') != rounds_filter: continue try: results_json = json.loads(row_dict['results']) for result_entry in results_json: if 'result_text' in result_entry: if search_mode == "Exact Search" and search_term == result_entry['result_text']: entry = { 'function_name': function_name, 'step': args_dict.get('step'), 'rounds': args_dict.get('rounds'), 'result': result_entry } results.append(entry) elif search_mode == "Contains Word" and search_term in result_entry['result_text']: entry = { 'function_name': function_name, 'step': args_dict.get('step'), 'rounds': args_dict.get('rounds'), 'result': result_entry } results.append(entry) except (json.JSONDecodeError, TypeError) as e: logger.error(f"Error processing row: {e}") continue elif search_type == "Gematria Sum in results": # Optimization: If only main_book_filter is selected, don't perform a full search if main_book_filter != "All" and rounds_filter == "All" and not gematria_sum_search: return results if not isinstance(gematria_sum_search, (int, float)): return results cursor.execute("SELECT * FROM els_cache") all_results = cursor.fetchall() columns = [desc[0] for desc in cursor.description] for row in all_results: row_dict = dict(zip(columns, row)) args_dict = json.loads(row_dict['args']) function_name = row_dict['function_name'] # Function name filtering include_result = False if main_book_filter == "All": include_result = True elif main_book_filter == "Torah" and function_name == "torah.process_json_files": include_result = True elif main_book_filter == "Bible" and function_name == "bible.process_json_files": include_result = True elif main_book_filter == "Quran" and function_name == "quran.process_json_files": include_result = True elif main_book_filter == "Rig Veda" and function_name == "hindu.process_json_files": include_result = True elif main_book_filter == "Tripitaka" and function_name == "tripitaka.process_json_files": include_result = True if not include_result: continue # Rounds filtering if rounds_filter != "All" and args_dict.get('rounds') != rounds_filter: continue try: results_json = json.loads(row_dict['results']) for result_entry in results_json: if 'result_sum' in result_entry and result_entry[ 'result_sum'] == gematria_sum_search: entry = { 'function_name': function_name, 'step': args_dict.get('step'), 'rounds': args_dict.get('rounds'), 'result': result_entry } results.append(entry) except (json.JSONDecodeError, TypeError) as e: logger.error(f"Error processing row: {e}") continue # Sort results by gematria sum results.sort( key=lambda x: x['result']['result_sum'] if 'result' in x and 'result_sum' in x['result'] else 0) return results except sqlite3.Error as e: logger.error(f"Database error: {e}") return [] def update_search_components(search_type): """Updates the visibility of the search term and gematria sum input fields.""" if search_type == "Text in result_text": return {"visible": True, "__type__": "update"}, {"visible": False, "__type__": "update"} else: return {"visible": False, "__type__": "update"}, {"visible": True, "__type__": "update"} with gr.Tab("Date Range Journal Sum Search"): with gr.Row(): start_date_jr = Calendar(type="datetime", label="Start Date") end_date_jr = Calendar(type="datetime", label="End Date") with gr.Row(): names_input_jr = gr.Textbox(label="Names (one per line)", lines=5) search_sum_jr = gr.Number(label="Search Journal Sum", precision=0) with gr.Row(): calculate_btn_jr = gr.Button("Search Journal Sum") shared_sums_btn_jr = gr.Button("Find Shared Journal Sums") matching_dates_output_jr = gr.JSON(label="Matching Dates") shared_sums_output_jr = gr.JSON(label="Shared Journal Sums") calculate_btn_jr.click( lambda start_date, end_date, names_input, search_sum: calculate_and_find_dates( start_date, end_date, names_input, search_sum, find_shared=False), inputs=[start_date_jr, end_date_jr, names_input_jr, search_sum_jr], outputs=[matching_dates_output_jr, shared_sums_output_jr] ) shared_sums_btn_jr.click( lambda start_date, end_date, names_input: calculate_and_find_dates( start_date, end_date, names_input, 0, find_shared=True), inputs=[start_date_jr, end_date_jr, names_input_jr], outputs=[matching_dates_output_jr, shared_sums_output_jr] ) with gr.Tab("Date Range ELS Search"): with gr.Row(): start_date_els = Calendar(type="datetime", label="Start Date") end_date_els = Calendar(type="datetime", label="End Date") with gr.Row(): names_input_els = gr.Textbox(label="Names (one per line)", lines=5) with gr.Row(): search_type_els = gr.Radio( label="Search by", choices=["Text in result_text", "Gematria Sum in results"], value="Text in result_text" ) with gr.Row(): search_mode_els = gr.Radio( label="Search Mode", choices=["Exact Search", "Contains Word"], value="Contains Word" ) with gr.Row(): search_term_els = gr.Textbox(label="Search Term", visible=True) gematria_sum_search_els = gr.Number(label="Gematria Sum", visible=False) with gr.Row(): include_torah_chk_els = gr.Checkbox(label="Include Torah", value=True) include_bible_chk_els = gr.Checkbox(label="Include Bible", value=True) include_quran_chk_els = gr.Checkbox(label="Include Quran", value=True) include_hindu_chk_els = gr.Checkbox(label="Include Rigveda", value=False) include_tripitaka_chk_els = gr.Checkbox(label="Include Tripitaka", value=False) with gr.Row(): perform_search_btn_els = gr.Button("Perform Search") filtered_results_output_els = gr.JSON(label="Filtered Results") # Funktionen zur Aktualisierung der Sichtbarkeit der Sucheingabefelder def update_search_components_els(search_type): if search_type == "Text in result_text": return gr.Textbox(visible=True), gr.Number(visible=False) else: return gr.Textbox(visible=False), gr.Number(visible=True) search_type_els.change( fn=update_search_components_els, inputs=[search_type_els], outputs=[search_term_els, gematria_sum_search_els] ) # Hauptfunktion für den vierten Tab def perform_date_range_els_search(start_date, end_date, names_input, search_type, search_term, gematria_sum_search, search_mode, include_torah, include_bible, include_quran, include_hindu, include_tripitaka): names = [n.strip() for n in names_input.split("\n") if n.strip()] date_gematrias = perform_gematria_calculation_for_date_range(start_date, end_date) # Zwischenergebnisse mit Datum, Namen und Gematria-Summe speichern intermediate_results = [] for date_str, date_data in date_gematrias.items(): for name in names: name_gematria = calculate_gematria_sum(name, "") combined_gematria_sum = date_data["date_gematria"] + name_gematria intermediate_results.append( {"date": date_str, "name": name, "gematria_sum": combined_gematria_sum} ) # Ergebnisse nach Datum sortieren intermediate_results.sort(key=lambda x: x["date"]) all_results = [] for intermediate_result in intermediate_results: date_str = intermediate_result["date"] name = intermediate_result["name"] gematria_sum = intermediate_result["gematria_sum"] # Hier die ELS-Suche für jede Gematria-Summe durchführen els_results = perform_els_search( step=gematria_sum, rounds_combination="1,-1", tlang="english", strip_spaces=True, strip_in_braces=True, strip_diacritics_chk=True, include_torah=include_torah, include_bible=include_bible, include_quran=include_quran, include_hindu=include_hindu, include_tripitaka=include_tripitaka ) # Die Ergebnisse für jedes Buch und jede Gematria-Summe verarbeiten for book_name, book_results in els_results.items(): if book_results: for result in book_results: try: result_text = result['result_text'] result_sum = result['result_sum'] # Filtern der Ergebnisse if search_type == "Text in result_text": if search_mode == "Exact Search" and search_term == result_text: all_results.append( { "date": date_str, "name": name, "gematria_sum": gematria_sum, "book": book_name, "result": result } ) elif search_mode == "Contains Word" and search_term in result_text: all_results.append( { "date": date_str, "name": name, "gematria_sum": gematria_sum, "book": book_name, "result": result } ) elif search_type == "Gematria Sum in results": if result_sum == gematria_sum_search: all_results.append( { "date": date_str, "name": name, "gematria_sum": gematria_sum, "book": book_name, "result": result } ) except KeyError as e: logger.error(f"KeyError - Key '{e.args[0]}' not found in result. Skipping this result.") continue return all_results perform_search_btn_els.click( perform_date_range_els_search, inputs=[start_date_els, end_date_els, names_input_els, search_type_els, search_term_els, gematria_sum_search_els, search_mode_els, include_torah_chk_els, include_bible_chk_els, include_quran_chk_els, include_hindu_chk_els, include_tripitaka_chk_els], outputs=[filtered_results_output_els] ) # --- Event Handlers --- search_type.change( fn=update_search_components, inputs=[search_type], outputs=[search_term, gematria_sum_search] ) search_db_btn.click( fn=search_cache_database, inputs=[search_type, search_term, gematria_sum_search, main_book_filter, rounds_filter, search_mode], outputs=cache_search_results ) def update_rounds_choices(): return gr.update(choices=extract_rounds_combinations()) # gr.update, nicht gr.Dropdown.update app.load(fn=update_rounds_choices, inputs=None, outputs=rounds_filter) main_book_filter.change( fn=update_rounds_choices, inputs=None, # No input needed here outputs=rounds_filter ) # rest of the handlers def update_date_words(selected_date, date_language_input, use_day, use_month, use_year): if selected_date is None: return "" if not use_year and not use_month and not use_day: return translate_date_to_words(selected_date, date_language_input) year = selected_date.year if use_year else None month = selected_date.month if use_month else None day = selected_date.day if use_day else None if year is not None and month is not None and day is not None: date_obj = selected_date elif year is not None and month is not None: date_obj = str(f"{year}-{month}") elif year is not None: date_obj = str(f"{year}") else: # Return empty string if no date components are selected return "" date_in_words = date_to_words(date_obj) translator = GoogleTranslator(source='auto', target=date_language_input) translated_date_words = translator.translate(date_in_words) return custom_normalize(translated_date_words) def update_journal_sum(gematria_text, date_words_output): sum_value = calculate_gematria_sum(gematria_text, date_words_output) return sum_value, sum_value, sum_value def update_rounds_combination(round_x, round_y): return f"{int(round_x)},{int(round_y)}" def update_step_half(float_step): new_step = math.ceil(float_step / 2) return new_step, float_step / 2 def update_step_double(float_step): new_step = math.ceil(float_step * 2) return new_step, float_step * 2 def find_closest_phrase(target_phrase, phrases): best_match = None best_score = 0 logging.debug(f"Target phrase for similarity search: {target_phrase}") for phrase, _, _, _, _, _ in phrases: word_length_diff = abs(len(target_phrase.split()) - len(phrase.split())) similarity_score = fuzz.ratio(target_phrase, phrase) combined_score = similarity_score - word_length_diff logging.debug(f"Comparing with phrase: {phrase}") logging.debug( f"Word Length Difference: {word_length_diff}, Similarity Score: {similarity_score}, Combined Score: {combined_score}") if combined_score > best_score: best_score = combined_score best_match = phrase logging.debug(f"Closest phrase found: {best_match} with score: {best_score}") return best_match def perform_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah, include_bible, include_quran, include_hindu, include_tripitaka, gematria_text, date_words_output, selected_date): els_results = perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah, include_bible, include_quran, include_hindu, include_tripitaka) most_frequent_phrases = {} combined_and_sorted_results = [] for book_name, book_results in els_results.items(): if book_results: most_frequent_phrases[book_name] = "" for result in book_results: try: gematria_sum = calculate_gematria(result['result_text']) max_words = len(result['result_text'].split()) matching_phrases = search_gematria_in_db(gematria_sum, max_words) max_words_limit = 20 while not matching_phrases and max_words < max_words_limit: max_words += 1 matching_phrases = search_gematria_in_db(gematria_sum, max_words) if matching_phrases: most_frequent_phrase = get_most_frequent_phrase(matching_phrases) most_frequent_phrases[book_name] = most_frequent_phrase else: # closest_phrase = find_closest_phrase(result['result_text'], # search_gematria_in_db(gematria_sum, max_words_limit)) # Removed fuzzywuzzy most_frequent_phrases[ book_name] = "" # closest_phrase or "" result['Most Frequent Phrase'] = most_frequent_phrases[book_name] if 'book' in result: if isinstance(result['book'], int): result['book'] = f"{book_name} {result['book']}." combined_and_sorted_results.append(result) except KeyError as e: print(f"DEBUG: KeyError - Key '{e.args[0]}' not found in result. Skipping this result.") continue selected_language_long = tlang tlang_short = LANGUAGES_SUPPORTED.get(selected_language_long) if tlang_short is None: tlang_short = "en" logger.warning(f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).") phrases_to_translate = [] phrases_source_langs = [] results_to_translate = [] results_source_langs = [] for result in combined_and_sorted_results: phrases_to_translate.append(result.get('Most Frequent Phrase', '')) phrases_source_langs.append("he") results_to_translate.append(result.get('result_text', '')) results_source_langs.append(result.get("source_language", "auto")) translated_phrases = translation_utils.batch_translate(phrases_to_translate, tlang_short, phrases_source_langs) translated_result_texts = translation_utils.batch_translate(results_to_translate, tlang_short, results_source_langs) for i, result in enumerate(combined_and_sorted_results): result['translated_text'] = translated_result_texts.get(results_to_translate[i], None) result['Translated Most Frequent Phrase'] = translated_phrases.get(phrases_to_translate[i], None) updated_els_results = add_24h_projection(els_results) updated_els_results = add_monthly_projection(updated_els_results, selected_date) updated_els_results = add_yearly_projection(updated_els_results, selected_date) combined_and_sorted_results = [] for book_results in updated_els_results.values(): combined_and_sorted_results.extend(book_results) combined_and_sorted_results = sort_results(combined_and_sorted_results) df = pd.DataFrame(combined_and_sorted_results) df.index = range(1, len(df) + 1) df.reset_index(inplace=True) df.rename(columns={'index': 'Result Number'}, inplace=True) for i, result in enumerate(combined_and_sorted_results): result['Result Number'] = i + 1 search_config = { "step": step, "rounds_combination": rounds_combination, "target_language": tlang, "strip_spaces": strip_spaces, "strip_in_braces": strip_in_braces, "strip_diacritics": strip_diacritics_chk, "include_torah": include_torah, "include_bible": include_bible, "include_quran": include_quran, "include_hindu": include_hindu, "include_tripitaka": include_tripitaka, "gematria_text": gematria_text, "date_words": date_words_output } output_data = { "search_configuration": search_config, "results": combined_and_sorted_results } json_data = output_data combined_most_frequent = "\n".join( f"{book}: {phrase}" for book, phrase in most_frequent_phrases.items()) return df, combined_most_frequent, json_data # --- Event Triggers --- round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination) round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination) selected_date.change(update_date_words, inputs=[selected_date, date_language_input, use_day, use_month, use_year], outputs=[date_words_output]) date_language_input.change(update_date_words, inputs=[selected_date, date_language_input, use_day, use_month, use_year], outputs=[date_words_output]) gematria_text.change(update_journal_sum, inputs=[gematria_text, date_words_output], outputs=[gematria_result, step, float_step]) date_words_output.change(update_journal_sum, inputs=[gematria_text, date_words_output], outputs=[gematria_result, step, float_step]) half_step_btn.click(update_step_half, inputs=[float_step], outputs=[step, float_step]) double_step_btn.click(update_step_double, inputs=[float_step], outputs=[step, float_step]) translate_btn.click( perform_search, inputs=[step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah_chk, include_bible_chk, include_quran_chk, include_hindu_chk, include_tripitaka_chk, gematria_text, date_words_output, selected_date], outputs=[markdown_output, most_frequent_phrase_output, json_output] ) app.load( update_date_words, inputs=[selected_date, date_language_input, use_day, use_month, use_year], outputs=[date_words_output] ) use_day.change( update_date_words, inputs=[selected_date, date_language_input, use_day, use_month, use_year], outputs=[date_words_output] ) use_month.change( update_date_words, inputs=[selected_date, date_language_input, use_day, use_month, use_year], outputs=[date_words_output] ) use_year.change( update_date_words, inputs=[selected_date, date_language_input, use_day, use_month, use_year], outputs=[date_words_output] ) def checkbox_behavior(use_day_value, use_month_value): if use_day_value: return True, True return use_month_value, True use_day.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year]) use_month.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year]) if __name__ == "__main__": app.launch(share=False)