els_journal / app.py
neuralworm's picture
fix tab 4
f35e60a
raw
history blame
64.5 kB
import logging
import os
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)
import gradio as gr
import torah
import bible
import quran
import hindu
import tripitaka
from utils import number_to_ordinal_word, custom_normalize, date_to_words, translate_date_to_words
from gematria import calculate_gematria, strip_diacritics
import pandas as pd
from deep_translator import GoogleTranslator
from gradio_calendar import Calendar
from datetime import datetime, timedelta
import math
import json
import re
import sqlite3
from collections import defaultdict
from typing import List, Tuple
# import rich # Removed rich
# from fuzzywuzzy import fuzz # Removed fuzzywuzzy
import calendar
import translation_utils
import hashlib
translation_utils.create_translation_table()
# Create a translator instance *once* globally
translator = GoogleTranslator(source='auto', target='auto')
LANGUAGES_SUPPORTED = translator.get_supported_languages(as_dict=True)
LANGUAGE_CODE_MAP = LANGUAGES_SUPPORTED # Use deep_translator's mapping directly
# --- Constants ---
DATABASE_FILE = 'gematria.db'
MAX_PHRASE_LENGTH_LIMIT = 20
ELS_CACHE_DB = "els_cache.db"
DATABASE_TIMEOUT = 60
# --- ELS Cache Functions ---
def create_els_cache_table():
if not os.path.exists(ELS_CACHE_DB):
with sqlite3.connect(ELS_CACHE_DB) as conn:
conn.execute('''
CREATE TABLE IF NOT EXISTS els_cache (
query_hash TEXT PRIMARY KEY,
function_name TEXT,
args TEXT,
kwargs TEXT,
results TEXT
)
''')
# --- Database Initialization ---
def initialize_database():
global conn
conn = sqlite3.connect(DATABASE_FILE)
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS results (
gematria_sum INTEGER,
words TEXT,
translation TEXT,
book TEXT,
chapter INTEGER,
verse INTEGER,
phrase_length INTEGER,
word_position TEXT,
PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position)
)
''')
cursor.execute('''
CREATE INDEX IF NOT EXISTS idx_results_gematria
ON results (gematria_sum)
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS processed_books (
book TEXT PRIMARY KEY,
max_phrase_length INTEGER
)
''')
conn.commit()
# --- Initialize Database ---
initialize_database()
# --- ELS Cache Functions ---
def create_els_cache_table():
with sqlite3.connect(ELS_CACHE_DB) as conn:
try:
conn.execute('''
CREATE TABLE IF NOT EXISTS els_cache (
query_hash TEXT PRIMARY KEY,
function_name TEXT,
args TEXT,
kwargs TEXT,
results TEXT
)
''')
except sqlite3.OperationalError as e:
logger.error(f"Error creating table: {e}")
def get_query_hash(func, args, kwargs):
key = (func.__name__, args, kwargs)
return hashlib.sha256(json.dumps(key).encode()).hexdigest()
def cached_process_json_files(func, *args, **kwargs):
# Create a dictionary to store the parameters
params = {
"function": f"{func.__module__}.{func.__name__}"
}
# Add the positional arguments with their names
arg_names = func.__code__.co_varnames[:func.__code__.co_argcount]
for name, value in zip(arg_names, args):
params[name] = value
# Add the keyword arguments
for name, value in kwargs.items():
params[name] = value
# Convert the parameters to a JSON string
params_json = json.dumps(params)
# Use the parameters JSON string to generate the query hash
query_hash = get_query_hash(func, params_json, "")
# Ensure the table exists before any operations
create_els_cache_table()
try:
with sqlite3.connect(ELS_CACHE_DB, timeout=DATABASE_TIMEOUT) as conn:
cursor = conn.cursor()
cursor.execute(
"SELECT results FROM els_cache WHERE query_hash = ?", (query_hash,))
result = cursor.fetchone()
if result:
logger.info(f"Cache hit for query: {query_hash}")
return json.loads(result[0])
except sqlite3.Error as e:
logger.error(f"Database error checking cache: {e}")
logger.info(f"Cache miss for query: {query_hash}")
results = func(*args, **kwargs)
try:
with sqlite3.connect(ELS_CACHE_DB, timeout=DATABASE_TIMEOUT) as conn:
cursor = conn.cursor()
cursor.execute(
"INSERT INTO els_cache (query_hash, function_name, args, kwargs, results) VALUES (?, ?, ?, ?, ?)",
(query_hash, params["function"], params_json, json.dumps({}), json.dumps(results)))
conn.commit()
except sqlite3.Error as e:
logger.error(f"Database error caching results: {e}")
return results
# --- Helper Functions (from Network app.py) ---
def flatten_text(text: List) -> str:
if isinstance(text, list):
return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
return text
def search_gematria_in_db(gematria_sum: int, max_words: int) -> List[Tuple[str, str, int, int, int, str]]:
global conn
with sqlite3.connect(DATABASE_FILE) as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT words, book, chapter, verse, phrase_length, word_position
FROM results
WHERE gematria_sum = ? AND phrase_length <= ?
''', (gematria_sum, max_words))
results = cursor.fetchall()
return results
def get_most_frequent_phrase(results):
phrase_counts = defaultdict(int)
for words, book, chapter, verse, phrase_length, word_position in results:
phrase_counts[words] += 1
most_frequent_phrase = max(phrase_counts, key=phrase_counts.get) if phrase_counts else None
return most_frequent_phrase
# --- Functions from BOS app.py ---
def create_language_dropdown(label, default_value='English', show_label=True):
return gr.Dropdown(
choices=list(LANGUAGE_CODE_MAP.keys()),
label=label,
value=default_value,
show_label=show_label
)
def calculate_gematria_sum(text, date_words):
if text or date_words:
combined_input = f"{text} {date_words}"
logger.info(f"searching for input: {combined_input}")
numbers = re.findall(r'\d+', combined_input)
text_without_numbers = re.sub(r'\d+', '', combined_input)
number_sum = sum(int(number) for number in numbers)
text_gematria = calculate_gematria(strip_diacritics(text_without_numbers))
total_sum = text_gematria + number_sum
return total_sum
else:
return None
def perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk,
include_torah, include_bible, include_quran, include_hindu, include_tripitaka):
if step == 0 or rounds_combination == "0,0":
return None
results = {}
length = 0
selected_language_long = tlang
tlang = LANGUAGES_SUPPORTED.get(selected_language_long)
if tlang is None:
tlang = "en"
logger.warning(
f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).")
if include_torah:
logger.debug(
f"Arguments for Torah: {(1, 39, step, rounds_combination, length, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)}")
results["Torah"] = cached_process_json_files(torah.process_json_files, 1, 39, step, rounds_combination, length,
tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
else:
results["Torah"] = []
if include_bible:
results["Bible"] = cached_process_json_files(bible.process_json_files, 40, 66, step, rounds_combination,
length,
tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
else:
results["Bible"] = []
if include_quran:
results["Quran"] = cached_process_json_files(quran.process_json_files, 1, 114, step, rounds_combination,
length,
tlang, strip_spaces, strip_in_braces, strip_diacritics_chk)
else:
results["Quran"] = []
if include_hindu:
results["Rig Veda"] = cached_process_json_files(
hindu.process_json_files, 1, 10, step, rounds_combination, length, tlang, False, strip_in_braces,
strip_diacritics_chk)
else:
results["Rig Veda"] = []
if include_tripitaka:
results["Tripitaka"] = cached_process_json_files(
tripitaka.process_json_files, 1, 52, step, rounds_combination, length, tlang, strip_spaces,
strip_in_braces, strip_diacritics_chk)
else:
results["Tripitaka"] = []
return results
def add_24h_projection(results_dict):
for book_name, results in results_dict.items():
num_results = len(results)
if num_results > 0:
time_interval = timedelta(minutes=24 * 60 / num_results)
current_time = datetime.min.time()
for i in range(num_results):
next_time = (datetime.combine(datetime.min, current_time) + time_interval).time()
time_range_str = f"{current_time.strftime('%H:%M')}-{next_time.strftime('%H:%M')}"
results[i]['24h Projection'] = time_range_str
current_time = next_time
return results_dict
def add_monthly_projection(results_dict, selected_date):
if selected_date is None:
return results_dict
for book_name, results in results_dict.items():
num_results = len(results)
if num_results > 0:
days_in_month = calendar.monthrange(selected_date.year, selected_date.month)[1]
total_seconds = (days_in_month - 1) * 24 * 3600
seconds_interval = total_seconds / num_results
start_datetime = datetime(selected_date.year, selected_date.month, 1)
current_datetime = start_datetime
for i in range(num_results):
next_datetime = current_datetime + timedelta(seconds=seconds_interval)
current_date = current_datetime.date()
next_date = next_datetime.date()
date_range_str = f"{current_date.strftime('%h %d')} - {next_date.strftime('%h %d')}"
results[i]['Monthly Projection'] = date_range_str
current_datetime = next_datetime
current_date = next_datetime.date()
return results_dict
def add_yearly_projection(results_dict, selected_date):
if selected_date is None:
return results_dict
for book_name, results in results_dict.items():
num_results = len(results)
if num_results > 0:
days_in_year = 366 if calendar.isleap(selected_date.year) else 365
total_seconds = (days_in_year - 1) * 24 * 3600
seconds_interval = total_seconds / num_results
start_datetime = datetime(selected_date.year, 1, 1)
current_datetime = start_datetime
for i in range(num_results):
next_datetime = current_datetime + timedelta(seconds=seconds_interval)
current_date = current_datetime.date()
next_date = next_datetime.date()
date_range_str = f"{current_date.strftime('%b %d')} - {next_date.strftime('%b %d')}"
results[i]['Yearly Projection'] = date_range_str
current_datetime = next_datetime
return results_dict
def sort_results(results):
def parse_time(time_str):
try:
hours, minutes = map(int, time_str.split(':'))
return hours * 60 + minutes
except ValueError:
return 24 * 60
return sorted(results, key=lambda x: (
parse_time(x.get('24h Projection', '23:59').split('-')[0]),
parse_time(x.get('24h Projection', '23:59').split('-')[1])
))
def extract_rounds_combinations():
"""Extracts unique rounds combinations from the database."""
combinations = set()
try:
with sqlite3.connect(ELS_CACHE_DB) as conn:
cursor = conn.cursor()
cursor.execute("SELECT args FROM els_cache")
all_args = cursor.fetchall()
for args_tuple in all_args:
args_str = args_tuple[0]
try:
args_json = json.loads(args_str)
if 'rounds' in args_json:
combinations.add(args_json['rounds'])
except json.JSONDecodeError:
logger.error(f"Could not decode JSON for args: {args_str}")
except sqlite3.Error as e:
logger.error(f"Database error: {e}")
logger.info(f"Found unique rounds combinations: {combinations}")
return ["All"] + sorted(list(combinations))
def update_rounds_dropdown():
new_choices = extract_rounds_combinations()
return new_choices
def perform_gematria_calculation_for_date_range(start_date, end_date):
logger.debug(f"Calculating date gematria for range: {start_date} - {end_date}")
results = {}
delta = timedelta(days=1)
current_date = start_date
while current_date <= end_date:
date_string = current_date.strftime("%Y-%m-%d")
date_words = date_to_words(date_string)
date_gematria = calculate_gematria_sum(date_words, "") # Angepasst, um der Funktion calculate_gematria_sum zu entsprechen
results[date_string] = {
"date_words": date_words,
"date_gematria": date_gematria,
}
current_date += delta
logger.debug(f"Finished calculating date gematria.")
return results
def find_matching_dates(date_gematrias, names, search_journal_sum):
logger.debug(f"Searching for matches with journal sum: {search_journal_sum}")
matching_dates = {}
for name in names:
name_gematria = calculate_gematria_sum(name, "") # Angepasst, um der Funktion calculate_gematria_sum zu entsprechen
target_date_gematria = search_journal_sum - name_gematria if name_gematria is not None else None
logger.debug(f"Name: {name}, Gematria: {name_gematria}, Target Date Gematria: {target_date_gematria}")
if target_date_gematria is not None:
for date_str, date_data in date_gematrias.items():
if date_data["date_gematria"] == target_date_gematria:
if name not in matching_dates:
matching_dates[name] = []
matching_dates[name].append(date_str)
logger.debug(f"Matches for {name}: {matching_dates.get(name, [])}")
return matching_dates
def find_shared_journal_sums(date_gematrias, names):
"""Finds shared journal sums and formats output with names and dates together."""
logger.debug("Calculating shared journal sums...")
shared_sums = {}
name_gematrias = {name: calculate_gematria_sum(name, "") for name in names}
for date_str, date_data in date_gematrias.items():
date_gematria = date_data["date_gematria"]
for name, name_gematria in name_gematrias.items():
journal_sum = date_gematria + name_gematria
journal_sum_str = str(journal_sum) # Konvertiere den Schlüssel (journal_sum) in einen String
if journal_sum_str not in shared_sums:
shared_sums[journal_sum_str] = {}
if name not in shared_sums[journal_sum_str]:
shared_sums[journal_sum_str][name] = []
shared_sums[journal_sum_str][name].append(date_str)
# Filter out sums not shared by at least two names and format output
result = {}
for journal_sum_str, data in shared_sums.items():
if len(data) >= 2:
result[journal_sum_str] = {}
for name, dates in data.items():
result[journal_sum_str][name] = dates
logger.debug(f"Shared Journal Sums: {result}")
return result
def calculate_and_find_dates(start_date, end_date, names_input, search_journal_sum, find_shared=False):
names = [n.strip() for n in names_input.split("\n") if n.strip()]
date_gematrias = perform_gematria_calculation_for_date_range(start_date, end_date)
if find_shared:
shared_sums = find_shared_journal_sums(date_gematrias, names)
return None, shared_sums
else:
matching_dates = find_matching_dates(date_gematrias, names, int(search_journal_sum))
return matching_dates, None
# --- Main Gradio App ---
with gr.Blocks() as app:
with gr.Tab("ELS Search"):
with gr.Column():
with gr.Row():
tlang = create_language_dropdown("Target Language for Result Translation", default_value='english')
selected_date = Calendar(type="datetime", label="Date to investigate (optional)",
info="Pick a date from the calendar")
use_day = gr.Checkbox(label="Use Day", info="Check to include day in search", value=True)
use_month = gr.Checkbox(label="Use Month", info="Check to include month in search", value=True)
use_year = gr.Checkbox(label="Use Year", info="Check to include year in search", value=True)
date_language_input = create_language_dropdown(
"Language of the person/topic (optional) (Date Word Language)", default_value='english')
with gr.Row():
gematria_text = gr.Textbox(label="Name and/or Topic (required)",
value="Hans Albert Einstein Mileva Marity-Einstein")
date_words_output = gr.Textbox(label="Date in Words Translated (optional)")
gematria_result = gr.Number(label="Journal Sum")
# with gr.Row():
with gr.Row():
step = gr.Number(label="Jump Width (Steps) for ELS")
float_step = gr.Number(visible=False, value=1)
half_step_btn = gr.Button("Steps / 2")
double_step_btn = gr.Button("Steps * 2")
with gr.Column():
round_x = gr.Number(label="Round (1)", value=1)
round_y = gr.Number(label="Round (2)", value=-1)
rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1")
with gr.Row():
include_torah_chk = gr.Checkbox(label="Include Torah", value=True)
include_bible_chk = gr.Checkbox(label="Include Bible", value=True)
include_quran_chk = gr.Checkbox(label="Include Quran", value=True)
include_hindu_chk = gr.Checkbox(label="Include Rigveda", value=False)
include_tripitaka_chk = gr.Checkbox(label="Include Tripitaka", value=False)
strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True)
strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True)
strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True)
translate_btn = gr.Button("Search with ELS")
# --- Output Components ---
markdown_output = gr.Dataframe(label="ELS Results")
most_frequent_phrase_output = gr.Textbox(label="Most Frequent Phrase in Network Search")
json_output = gr.JSON(label="JSON Output")
with gr.Tab("Cache Database Search"):
with gr.Column():
with gr.Row():
main_book_filter = gr.Dropdown(label="Filter by Main Book",
choices=["All", "Torah", "Bible", "Quran", "Rig Veda", "Tripitaka"],
value="Torah")
# Keine choices hier, nur das Label und den Initialwert
rounds_filter = gr.Dropdown(label="Filter by Rounds", allow_custom_value=True, value="1,-1")
with gr.Row():
search_type = gr.Radio(label="Search by",
choices=["Text in result_text", "Gematria Sum in results"],
value="Text in result_text")
with gr.Row():
search_mode = gr.Radio(label="Search Mode",
choices=["Exact Search", "Contains Word"],
value="Contains Word")
with gr.Row():
search_term = gr.Textbox(label="Search Term", visible=True)
gematria_sum_search = gr.Number(label="Gematria Sum", visible=False)
with gr.Row():
search_db_btn = gr.Button("Search Cache Database")
with gr.Row():
cache_search_results = gr.JSON(label="Cache Search Results")
def update_search_components(search_type):
if search_type == "Text in result_text":
return gr.Textbox.update(visible=True), gr.Number.update(visible=False)
else:
return gr.Textbox.update(visible=False), gr.Number.update(visible=True)
def search_cache_database(search_type, search_term, gematria_sum_search, main_book_filter, rounds_filter, search_mode):
"""Searches the cache database based on the selected filters and search term."""
results = []
search_term = strip_diacritics(search_term)
if main_book_filter == "All" and rounds_filter == "All" and not search_term and not gematria_sum_search:
return results
try:
with sqlite3.connect(ELS_CACHE_DB) as conn:
cursor = conn.cursor()
if search_type == "Text in result_text":
# Optimization: If only main_book_filter is selected, don't perform a full search
if main_book_filter != "All" and rounds_filter == "All" and not search_term:
return results
cursor.execute("SELECT * FROM els_cache")
all_results = cursor.fetchall()
columns = [desc[0] for desc in cursor.description]
for row in all_results:
row_dict = dict(zip(columns, row))
args_dict = json.loads(row_dict['args'])
function_name = row_dict['function_name']
# Function name filtering
include_result = False
if main_book_filter == "All":
include_result = True
elif main_book_filter == "Torah" and function_name == "torah.process_json_files":
include_result = True
elif main_book_filter == "Bible" and function_name == "bible.process_json_files":
include_result = True
elif main_book_filter == "Quran" and function_name == "quran.process_json_files":
include_result = True
elif main_book_filter == "Rig Veda" and function_name == "hindu.process_json_files":
include_result = True
elif main_book_filter == "Tripitaka" and function_name == "tripitaka.process_json_files":
include_result = True
if not include_result:
continue
# Rounds filtering
if rounds_filter != "All" and args_dict.get('rounds') != rounds_filter:
continue
try:
results_json = json.loads(row_dict['results'])
for result_entry in results_json:
if 'result_text' in result_entry:
if search_mode == "Exact Search" and search_term == result_entry['result_text']:
entry = {
'function_name': function_name,
'step': args_dict.get('step'),
'rounds': args_dict.get('rounds'),
'result': result_entry
}
results.append(entry)
elif search_mode == "Contains Word" and search_term in result_entry['result_text']:
entry = {
'function_name': function_name,
'step': args_dict.get('step'),
'rounds': args_dict.get('rounds'),
'result': result_entry
}
results.append(entry)
except (json.JSONDecodeError, TypeError) as e:
logger.error(f"Error processing row: {e}")
continue
elif search_type == "Gematria Sum in results":
# Optimization: If only main_book_filter is selected, don't perform a full search
if main_book_filter != "All" and rounds_filter == "All" and not gematria_sum_search:
return results
if not isinstance(gematria_sum_search, (int, float)):
return results
cursor.execute("SELECT * FROM els_cache")
all_results = cursor.fetchall()
columns = [desc[0] for desc in cursor.description]
for row in all_results:
row_dict = dict(zip(columns, row))
args_dict = json.loads(row_dict['args'])
function_name = row_dict['function_name']
# Function name filtering
include_result = False
if main_book_filter == "All":
include_result = True
elif main_book_filter == "Torah" and function_name == "torah.process_json_files":
include_result = True
elif main_book_filter == "Bible" and function_name == "bible.process_json_files":
include_result = True
elif main_book_filter == "Quran" and function_name == "quran.process_json_files":
include_result = True
elif main_book_filter == "Rig Veda" and function_name == "hindu.process_json_files":
include_result = True
elif main_book_filter == "Tripitaka" and function_name == "tripitaka.process_json_files":
include_result = True
if not include_result:
continue
# Rounds filtering
if rounds_filter != "All" and args_dict.get('rounds') != rounds_filter:
continue
try:
results_json = json.loads(row_dict['results'])
for result_entry in results_json:
if 'result_sum' in result_entry and result_entry[
'result_sum'] == gematria_sum_search:
entry = {
'function_name': function_name,
'step': args_dict.get('step'),
'rounds': args_dict.get('rounds'),
'result': result_entry
}
results.append(entry)
except (json.JSONDecodeError, TypeError) as e:
logger.error(f"Error processing row: {e}")
continue
# Sort results by gematria sum
results.sort(
key=lambda x: x['result']['result_sum'] if 'result' in x and 'result_sum' in x['result'] else 0)
return results
except sqlite3.Error as e:
logger.error(f"Database error: {e}")
return []
def update_search_components(search_type):
"""Updates the visibility of the search term and gematria sum input fields."""
if search_type == "Text in result_text":
return {"visible": True, "__type__": "update"}, {"visible": False, "__type__": "update"}
else:
return {"visible": False, "__type__": "update"}, {"visible": True, "__type__": "update"}
with gr.Tab("Date Range Journal Sum Search"):
with gr.Row():
start_date_jr = Calendar(type="datetime", label="Start Date")
end_date_jr = Calendar(type="datetime", label="End Date")
with gr.Row():
names_input_jr = gr.Textbox(label="Names (one per line)", lines=5)
search_sum_jr = gr.Number(label="Search Journal Sum", precision=0)
with gr.Row():
calculate_btn_jr = gr.Button("Search Journal Sum")
shared_sums_btn_jr = gr.Button("Find Shared Journal Sums")
matching_dates_output_jr = gr.JSON(label="Matching Dates")
shared_sums_output_jr = gr.JSON(label="Shared Journal Sums")
calculate_btn_jr.click(
lambda start_date, end_date, names_input, search_sum: calculate_and_find_dates(
start_date, end_date, names_input, search_sum, find_shared=False),
inputs=[start_date_jr, end_date_jr, names_input_jr, search_sum_jr],
outputs=[matching_dates_output_jr, shared_sums_output_jr]
)
shared_sums_btn_jr.click(
lambda start_date, end_date, names_input: calculate_and_find_dates(
start_date, end_date, names_input, 0, find_shared=True),
inputs=[start_date_jr, end_date_jr, names_input_jr],
outputs=[matching_dates_output_jr, shared_sums_output_jr]
)
with gr.Tab("Date Range ELS Search"):
with gr.Row():
start_date_els = Calendar(type="datetime", label="Start Date")
end_date_els = Calendar(type="datetime", label="End Date")
with gr.Row():
names_input_els = gr.Textbox(label="Names (one per line)", lines=5)
with gr.Row():
search_type_els = gr.Radio(
label="Search by",
choices=["Text in result_text", "Gematria Sum in results"],
value="Text in result_text"
)
with gr.Row():
search_mode_els = gr.Radio(
label="Search Mode",
choices=["Exact Search", "Contains Word"],
value="Contains Word"
)
with gr.Row():
search_term_els = gr.Textbox(label="Search Term", visible=True)
gematria_sum_search_els = gr.Number(label="Gematria Sum", visible=False)
with gr.Row():
include_torah_chk_els = gr.Checkbox(label="Include Torah", value=True)
include_bible_chk_els = gr.Checkbox(label="Include Bible", value=True)
include_quran_chk_els = gr.Checkbox(label="Include Quran", value=True)
include_hindu_chk_els = gr.Checkbox(label="Include Rigveda", value=False)
include_tripitaka_chk_els = gr.Checkbox(label="Include Tripitaka", value=False)
with gr.Row():
translate_results_chk_els = gr.Checkbox(label="Translate Results to English", value=False)
with gr.Row():
sub_oscillation_search_chk_els = gr.Checkbox(label="Search in Sub-Oscillations", value=False) # Neue Checkbox
sub_oscillation_level_els = gr.Number(label="Sub-Oscillation Level (0 = off)", precision=0, value=1)
with gr.Row():
perform_search_btn_els = gr.Button("Perform Search")
filtered_results_output_els = gr.JSON(label="Filtered Results")
# Funktionen zur Aktualisierung der Sichtbarkeit der Sucheingabefelder
def update_search_components_els(search_type):
if search_type == "Text in result_text":
return gr.Textbox(visible=True), gr.Number(visible=False)
else:
return gr.Textbox(visible=False), gr.Number(visible=True)
search_type_els.change(
fn=update_search_components_els,
inputs=[search_type_els],
outputs=[search_term_els, gematria_sum_search_els]
)
# Hauptfunktion für den vierten Tab
def perform_date_range_els_search(start_date, end_date, names_input, search_type, search_term, gematria_sum_search, search_mode, include_torah, include_bible, include_quran, include_hindu, include_tripitaka, translate_results, sub_oscillation_search, sub_oscillation_level):
names = [n.strip() for n in names_input.split("\n") if n.strip()]
date_gematrias = perform_gematria_calculation_for_date_range(start_date, end_date)
search_term = strip_diacritics(search_term)
# Zwischenergebnisse mit Datum, Namen und Gematria-Summe speichern
intermediate_results = []
for date_str, date_data in date_gematrias.items():
for name in names:
name_gematria = calculate_gematria_sum(name, "")
combined_gematria_sum = date_data["date_gematria"] + name_gematria
intermediate_results.append(
{"date": date_str, "name": name, "gematria_sum": combined_gematria_sum}
)
# Ergebnisse nach Datum sortieren
intermediate_results.sort(key=lambda x: x["date"])
all_results = []
for intermediate_result in intermediate_results:
date_str = intermediate_result["date"]
name = intermediate_result["name"]
initial_gematria_sum = intermediate_result["gematria_sum"]
# Basis-Suche durchführen
base_results = perform_els_search_for_gematria_sum(
initial_gematria_sum, include_torah, include_bible, include_quran, include_hindu, include_tripitaka
)
# Nur ausgewählte Bücher für die Sub-Oszillationen-Suche berücksichtigen
selected_books_results = {}
if include_torah:
selected_books_results["Torah"] = base_results.get("Torah", [])
if include_bible:
selected_books_results["Bible"] = base_results.get("Bible", [])
if include_quran:
selected_books_results["Quran"] = base_results.get("Quran", [])
if include_hindu:
selected_books_results["Rig Veda"] = base_results.get("Rig Veda", [])
if include_tripitaka:
selected_books_results["Tripitaka"] = base_results.get("Tripitaka", [])
# Speichere die base_results in einer Liste
base_results_list = []
for book_name, book_results in selected_books_results.items():
for result in book_results:
base_results_list.append({'book': book_name, 'result': result})
# Sub-Oszillationen-Suche, falls aktiviert und Level > 0
if sub_oscillation_search and sub_oscillation_level > 0:
base_results_list = perform_sub_oscillation_search(
base_results_list, initial_gematria_sum, sub_oscillation_level,
include_torah, include_bible, include_quran, include_hindu, include_tripitaka
)
# Umwandlung der erweiterten base_results_list in base_results
base_results = {}
for result_entry in base_results_list:
book_name = result_entry['book']
if book_name not in base_results:
base_results[book_name] = []
base_results[book_name].append(result_entry['result'])
# Filterung und Hinzufügen der Ergebnisse zu all_results
filtered_results = filter_and_format_results(
base_results, search_type, search_term, gematria_sum_search,
search_mode, date_str, name, initial_gematria_sum, sub_oscillation_level, base_results_list
)
all_results.extend(filtered_results)
# Übersetzung der Ergebnisse, falls angefordert
if translate_results:
for result_entry in all_results:
try:
text_to_translate = result_entry["result"]["result_text"]
source_lang = result_entry["result"].get("source_language", "auto")
translated_text = translation_utils.get_translation(text_to_translate, "en", source_lang)[0]
result_entry["result"]["translated_text"] = translated_text
except KeyError as e:
logger.error(f"KeyError translating result_text: {e}")
return all_results
# Hilfsfunktion zum Durchführen der ELS-Suche für eine gegebene Gematria-Summe
def perform_els_search_for_gematria_sum(gematria_sum, include_torah, include_bible, include_quran, include_hindu, include_tripitaka):
return perform_els_search(
step=gematria_sum,
rounds_combination="1,-1",
tlang="english",
strip_spaces=True,
strip_in_braces=True,
strip_diacritics_chk=True,
include_torah=include_torah,
include_bible=include_bible,
include_quran=include_quran,
include_hindu=include_hindu,
include_tripitaka=include_tripitaka
)
# Hilfsfunktion zum Filtern und Formatieren der Ergebnisse
def filter_and_format_results(results, search_type, search_term, gematria_sum_search, search_mode, date_str, name, initial_gematria_sum, sub_oscillation_level, base_results_list):
search_term=strip_diacritics(search_term)
filtered_results = []
for book_name, book_results in results.items():
if book_results:
for result in book_results:
try:
result_text = result['result_text']
result_sum = result['result_sum']
# Finde die verwendete Sub-Oszillation heraus
sub_oscillation_sums = []
current_gematria_sum = initial_gematria_sum
if sub_oscillation_level > 0:
for i in range(1, sub_oscillation_level + 1):
for base_result_entry_tuple in base_results_list:
base_result_entry = dict(base_result_entry_tuple)
if result.get('result_sum') == base_result_entry['result'].get('result_sum') and result.get('result_text') == base_result_entry['result'].get('result_text'):
sub_gematria_sum = base_result_entry.get('sub_gematria_sum')
if sub_gematria_sum:
sub_oscillation_sums.append(sub_gematria_sum)
current_gematria_sum += sub_gematria_sum
break # Innere Schleife beenden, da das passende Ergebnis gefunden wurde
else:
continue # Äußere Schleife fortsetzen, wenn kein passendes Ergebnis gefunden wurde
break
# Sub-Oszillation Summen an den Namen anhängen
sub_oscillation_text = ""
if sub_oscillation_sums:
sub_oscillation_text = " + " + " + ".join(map(str, sub_oscillation_sums))
# Filtern der Ergebnisse
if search_type == "Text in result_text":
if search_mode == "Exact Search" and search_term == result_text:
filtered_results.append(
{
"date": date_str,
"name": f"{name}{sub_oscillation_text}",
"gematria_sum": current_gematria_sum,
"book": book_name,
"result": result
}
)
elif search_mode == "Contains Word" and search_term in result_text:
filtered_results.append(
{
"date": date_str,
"name": f"{name}{sub_oscillation_text}",
"gematria_sum": current_gematria_sum,
"book": book_name,
"result": result
}
)
elif search_type == "Gematria Sum in results":
if result_sum == gematria_sum_search:
filtered_results.append(
{
"date": date_str,
"name": f"{name}{sub_oscillation_text}",
"gematria_sum": current_gematria_sum,
"book": book_name,
"result": result
}
)
except KeyError as e:
logger.error(f"KeyError - Key '{e.args[0]}' not found in result. Skipping this result.")
continue
return filtered_results
def perform_date_range_els_search(start_date, end_date, names_input, search_type, search_term, gematria_sum_search, search_mode, include_torah, include_bible, include_quran, include_hindu, include_tripitaka, translate_results, sub_oscillation_search, sub_oscillation_level):
names = [n.strip() for n in names_input.split("\n") if n.strip()]
date_gematrias = perform_gematria_calculation_for_date_range(start_date, end_date)
# Zwischenergebnisse mit Datum, Namen und Gematria-Summe speichern
intermediate_results = []
for date_str, date_data in date_gematrias.items():
for name in names:
name_gematria = calculate_gematria_sum(name, "")
combined_gematria_sum = date_data["date_gematria"] + name_gematria
intermediate_results.append(
{"date": date_str, "name": name, "gematria_sum": combined_gematria_sum}
)
# Ergebnisse nach Datum sortieren
intermediate_results.sort(key=lambda x: x["date"])
all_results = []
for intermediate_result in intermediate_results:
date_str = intermediate_result["date"]
name = intermediate_result["name"]
initial_gematria_sum = intermediate_result["gematria_sum"]
# Basis-Suche durchführen
base_results = perform_els_search_for_gematria_sum(
initial_gematria_sum, include_torah, include_bible, include_quran, include_hindu, include_tripitaka
)
# Nur ausgewählte Bücher für die Sub-Oszillationen-Suche berücksichtigen
selected_books_results = {}
if include_torah:
selected_books_results["Torah"] = base_results.get("Torah", [])
if include_bible:
selected_books_results["Bible"] = base_results.get("Bible", [])
if include_quran:
selected_books_results["Quran"] = base_results.get("Quran", [])
if include_hindu:
selected_books_results["Rig Veda"] = base_results.get("Rig Veda", [])
if include_tripitaka:
selected_books_results["Tripitaka"] = base_results.get("Tripitaka", [])
# Speichere die base_results in einer Liste von Tupeln
base_results_list = []
for book_name, book_results in selected_books_results.items():
for result in book_results:
base_results_list.append(tuple(sorted({'book': book_name, 'result': result}.items())))
# Sub-Oszillationen-Suche, falls aktiviert und Level > 0
if sub_oscillation_search and sub_oscillation_level > 0:
base_results_list = perform_sub_oscillation_search(
base_results_list, initial_gematria_sum, sub_oscillation_level,
include_torah, include_bible, include_quran, include_hindu, include_tripitaka
)
# Umwandlung der erweiterten base_results_list in base_results
base_results = {}
for result_entry_tuple in base_results_list:
result_entry = dict(result_entry_tuple) # Konvertiere zurück zu Dictionary
book_name = result_entry['book']
if book_name not in base_results:
base_results[book_name] = []
base_results[book_name].append(result_entry['result'])
# Filterung und Hinzufügen der Ergebnisse zu all_results
filtered_results = filter_and_format_results(
base_results, search_type, search_term, gematria_sum_search,
search_mode, date_str, name, initial_gematria_sum, sub_oscillation_level, base_results_list
)
all_results.extend(filtered_results)
# Übersetzung der Ergebnisse, falls angefordert
if translate_results:
for result_entry in all_results:
try:
text_to_translate = result_entry["result"]["result_text"]
source_lang = result_entry["result"].get("source_language", "auto")
translated_text = translation_utils.get_translation(text_to_translate, "en", source_lang)[0]
result_entry["result"]["translated_text"] = translated_text
except KeyError as e:
logger.error(f"KeyError translating result_text: {e}")
return all_results
def remove_duplicates(dict_list):
"""
Entfernt Duplikate aus dict_list, basierend auf einer ID (repr).
dict_list: Liste aus Dictionaries (oder Tupeln),
in denen wir Duplikate filtern wollen.
"""
seen = set()
unique_list = []
for item in dict_list:
# Hier legen wir fest, was item sein kann:
# - typischerweise ein dict: {'book':..., 'result':...}
# - oder ein tuple, das wir ggf. in dict umwandeln
# Mache item zu einem Dictionary, falls es ein tuple ist
if isinstance(item, tuple):
item = dict(item)
# => Jetzt haben wir item als Dictionary
# Erstelle eine ID; repr(item) enthält Keys und Values
# Wenn du es feingranular willst, kannst du stattdessen
# nur bestimmte Felder rausziehen, z.B. (book, result_sum, result_text)
item_id = repr(item)
if item_id not in seen:
seen.add(item_id)
unique_list.append(item)
return unique_list
def perform_sub_oscillation_search(base_results_list, initial_gematria_sum, level,
include_torah, include_bible, include_quran,
include_hindu, include_tripitaka):
"""
Führt 'level' Sub-Oszillationsrunden durch und
hängt neu entstehende Ergebnisse an base_results_list an.
Verwendet remove_duplicates, um Dopplungen zu vermeiden.
"""
if level == 0:
return base_results_list
# Mache base_results_list komplett zu Dicts
# (falls es Tupel gibt, wandeln wir die hier in Dicts):
dict_list = []
for entry in base_results_list:
if isinstance(entry, tuple):
dict_list.append(dict(entry))
elif isinstance(entry, dict):
dict_list.append(entry)
else:
dict_list.append({"unknown": entry})
new_results = []
# Erzeuge in diesem Durchgang neue Einträge
for base_entry in dict_list:
sub_gematria_sum = base_entry['result']['result_sum']
combined_sub_gematria_sum = initial_gematria_sum + sub_gematria_sum
sub_results = perform_els_search_for_gematria_sum(
combined_sub_gematria_sum,
include_torah, include_bible, include_quran, include_hindu, include_tripitaka
)
# Für jedes gefundene sub_result => new_entry
for book_name, book_list in sub_results.items():
for res in book_list:
new_entry = {
'book': book_name,
'result': res,
# speichere optional sub_gematria_sum
'sub_gematria_sum': sub_gematria_sum
}
new_results.append(new_entry)
# Kombiniere Originaldaten + neue Daten
combined = dict_list + new_results
# Entferne Duplikate
combined_no_dupes = remove_duplicates(combined)
# Rekursion: ein Level tiefer
return perform_sub_oscillation_search(
combined_no_dupes,
initial_gematria_sum,
level - 1,
include_torah,
include_bible,
include_quran,
include_hindu,
include_tripitaka
)
perform_search_btn_els.click(
perform_date_range_els_search,
inputs=[start_date_els, end_date_els, names_input_els, search_type_els, search_term_els, gematria_sum_search_els, search_mode_els, include_torah_chk_els, include_bible_chk_els, include_quran_chk_els, include_hindu_chk_els, include_tripitaka_chk_els, translate_results_chk_els, sub_oscillation_search_chk_els, sub_oscillation_level_els],
outputs=[filtered_results_output_els]
)
# --- Event Handlers ---
search_type.change(
fn=update_search_components,
inputs=[search_type],
outputs=[search_term, gematria_sum_search]
)
search_db_btn.click(
fn=search_cache_database,
inputs=[search_type, search_term, gematria_sum_search, main_book_filter, rounds_filter, search_mode],
outputs=cache_search_results
)
def update_rounds_choices():
return gr.update(choices=extract_rounds_combinations()) # gr.update, nicht gr.Dropdown.update
app.load(fn=update_rounds_choices, inputs=None, outputs=rounds_filter)
main_book_filter.change(
fn=update_rounds_choices,
inputs=None, # No input needed here
outputs=rounds_filter
)
# rest of the handlers
def update_date_words(selected_date, date_language_input, use_day, use_month, use_year):
if selected_date is None:
return ""
if not use_year and not use_month and not use_day:
return translate_date_to_words(selected_date, date_language_input)
year = selected_date.year if use_year else None
month = selected_date.month if use_month else None
day = selected_date.day if use_day else None
if year is not None and month is not None and day is not None:
date_obj = selected_date
elif year is not None and month is not None:
date_obj = str(f"{year}-{month}")
elif year is not None:
date_obj = str(f"{year}")
else: # Return empty string if no date components are selected
return ""
date_in_words = date_to_words(date_obj)
translator = GoogleTranslator(source='auto', target=date_language_input)
translated_date_words = translator.translate(date_in_words)
return custom_normalize(translated_date_words)
def update_journal_sum(gematria_text, date_words_output):
sum_value = calculate_gematria_sum(gematria_text, date_words_output)
return sum_value, sum_value, sum_value
def update_rounds_combination(round_x, round_y):
return f"{int(round_x)},{int(round_y)}"
def update_step_half(float_step):
new_step = math.ceil(float_step / 2)
return new_step, float_step / 2
def update_step_double(float_step):
new_step = math.ceil(float_step * 2)
return new_step, float_step * 2
def find_closest_phrase(target_phrase, phrases):
best_match = None
best_score = 0
logging.debug(f"Target phrase for similarity search: {target_phrase}")
for phrase, _, _, _, _, _ in phrases:
word_length_diff = abs(len(target_phrase.split()) - len(phrase.split()))
similarity_score = fuzz.ratio(target_phrase, phrase)
combined_score = similarity_score - word_length_diff
logging.debug(f"Comparing with phrase: {phrase}")
logging.debug(
f"Word Length Difference: {word_length_diff}, Similarity Score: {similarity_score}, Combined Score: {combined_score}")
if combined_score > best_score:
best_score = combined_score
best_match = phrase
logging.debug(f"Closest phrase found: {best_match} with score: {best_score}")
return best_match
def perform_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk,
include_torah, include_bible, include_quran, include_hindu, include_tripitaka, gematria_text,
date_words_output, selected_date):
els_results = perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces,
strip_diacritics_chk, include_torah, include_bible, include_quran,
include_hindu,
include_tripitaka)
most_frequent_phrases = {}
combined_and_sorted_results = []
for book_name, book_results in els_results.items():
if book_results:
most_frequent_phrases[book_name] = ""
for result in book_results:
try:
gematria_sum = calculate_gematria(result['result_text'])
max_words = len(result['result_text'].split())
matching_phrases = search_gematria_in_db(gematria_sum, max_words)
max_words_limit = 20
while not matching_phrases and max_words < max_words_limit:
max_words += 1
matching_phrases = search_gematria_in_db(gematria_sum, max_words)
if matching_phrases:
most_frequent_phrase = get_most_frequent_phrase(matching_phrases)
most_frequent_phrases[book_name] = most_frequent_phrase
else:
# closest_phrase = find_closest_phrase(result['result_text'],
# search_gematria_in_db(gematria_sum, max_words_limit)) # Removed fuzzywuzzy
most_frequent_phrases[
book_name] = "" # closest_phrase or ""
result['Most Frequent Phrase'] = most_frequent_phrases[book_name]
if 'book' in result:
if isinstance(result['book'], int):
result['book'] = f"{book_name} {result['book']}."
combined_and_sorted_results.append(result)
except KeyError as e:
print(f"DEBUG: KeyError - Key '{e.args[0]}' not found in result. Skipping this result.")
continue
selected_language_long = tlang
tlang_short = LANGUAGES_SUPPORTED.get(selected_language_long)
if tlang_short is None:
tlang_short = "en"
logger.warning(f"Unsupported language selected: {selected_language_long}. Defaulting to English (en).")
phrases_to_translate = []
phrases_source_langs = []
results_to_translate = []
results_source_langs = []
for result in combined_and_sorted_results:
phrases_to_translate.append(result.get('Most Frequent Phrase', ''))
phrases_source_langs.append("he")
results_to_translate.append(result.get('result_text', ''))
results_source_langs.append(result.get("source_language", "auto"))
translated_phrases = translation_utils.batch_translate(phrases_to_translate, tlang_short,
phrases_source_langs)
translated_result_texts = translation_utils.batch_translate(results_to_translate, tlang_short,
results_source_langs)
for i, result in enumerate(combined_and_sorted_results):
result['translated_text'] = translated_result_texts.get(results_to_translate[i], None)
result['Translated Most Frequent Phrase'] = translated_phrases.get(phrases_to_translate[i], None)
updated_els_results = add_24h_projection(els_results)
updated_els_results = add_monthly_projection(updated_els_results, selected_date)
updated_els_results = add_yearly_projection(updated_els_results, selected_date)
combined_and_sorted_results = []
for book_results in updated_els_results.values():
combined_and_sorted_results.extend(book_results)
combined_and_sorted_results = sort_results(combined_and_sorted_results)
df = pd.DataFrame(combined_and_sorted_results)
df.index = range(1, len(df) + 1)
df.reset_index(inplace=True)
df.rename(columns={'index': 'Result Number'}, inplace=True)
for i, result in enumerate(combined_and_sorted_results):
result['Result Number'] = i + 1
search_config = {
"step": step,
"rounds_combination": rounds_combination,
"target_language": tlang,
"strip_spaces": strip_spaces,
"strip_in_braces": strip_in_braces,
"strip_diacritics": strip_diacritics_chk,
"include_torah": include_torah,
"include_bible": include_bible,
"include_quran": include_quran,
"include_hindu": include_hindu,
"include_tripitaka": include_tripitaka,
"gematria_text": gematria_text,
"date_words": date_words_output
}
output_data = {
"search_configuration": search_config,
"results": combined_and_sorted_results
}
json_data = output_data
combined_most_frequent = "\n".join(
f"{book}: {phrase}" for book, phrase in most_frequent_phrases.items())
return df, combined_most_frequent, json_data
# --- Event Triggers ---
round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
selected_date.change(update_date_words, inputs=[selected_date, date_language_input, use_day, use_month, use_year],
outputs=[date_words_output])
date_language_input.change(update_date_words,
inputs=[selected_date, date_language_input, use_day, use_month, use_year],
outputs=[date_words_output])
gematria_text.change(update_journal_sum, inputs=[gematria_text, date_words_output],
outputs=[gematria_result, step, float_step])
date_words_output.change(update_journal_sum, inputs=[gematria_text, date_words_output],
outputs=[gematria_result, step, float_step])
half_step_btn.click(update_step_half, inputs=[float_step], outputs=[step, float_step])
double_step_btn.click(update_step_double, inputs=[float_step], outputs=[step, float_step])
translate_btn.click(
perform_search,
inputs=[step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, include_torah_chk,
include_bible_chk, include_quran_chk, include_hindu_chk, include_tripitaka_chk, gematria_text,
date_words_output, selected_date],
outputs=[markdown_output, most_frequent_phrase_output, json_output]
)
app.load(
update_date_words,
inputs=[selected_date, date_language_input, use_day, use_month, use_year],
outputs=[date_words_output]
)
use_day.change(
update_date_words,
inputs=[selected_date, date_language_input, use_day, use_month, use_year],
outputs=[date_words_output]
)
use_month.change(
update_date_words,
inputs=[selected_date, date_language_input, use_day, use_month, use_year],
outputs=[date_words_output]
)
use_year.change(
update_date_words,
inputs=[selected_date, date_language_input, use_day, use_month, use_year],
outputs=[date_words_output]
)
def checkbox_behavior(use_day_value, use_month_value):
if use_day_value:
return True, True
return use_month_value, True
use_day.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year])
use_month.change(checkbox_behavior, inputs=[use_day, use_month], outputs=[use_month, use_year])
if __name__ == "__main__":
app.launch(share=False)