import logging logger = logging.getLogger(__name__) logging.basicConfig(level=logging.DEBUG) import gradio as gr import torah import bible import quran from utils import number_to_ordinal_word, custom_normalize, date_to_words, translate_date_to_words from gematria import calculate_gematria, strip_diacritics import pandas as pd from deep_translator import GoogleTranslator from gradio_calendar import Calendar from datetime import datetime, timedelta import math import json import re import sqlite3 from collections import defaultdict from typing import List, Tuple # --- Constants --- DATABASE_FILE = 'gematria.db' MAX_PHRASE_LENGTH_LIMIT = 20 # --- Database Initialization --- def initialize_database(): global conn conn = sqlite3.connect(DATABASE_FILE) cursor = conn.cursor() cursor.execute(''' CREATE TABLE IF NOT EXISTS results ( gematria_sum INTEGER, words TEXT, translation TEXT, book TEXT, chapter INTEGER, verse INTEGER, phrase_length INTEGER, word_position TEXT, PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position) ) ''') cursor.execute(''' CREATE INDEX IF NOT EXISTS idx_results_gematria ON results (gematria_sum) ''') cursor.execute(''' CREATE TABLE IF NOT EXISTS processed_books ( book TEXT PRIMARY KEY, max_phrase_length INTEGER ) ''') conn.commit() # --- Initialize Database --- initialize_database() # --- Helper Functions (from Network app.py) --- def flatten_text(text: List) -> str: if isinstance(text, list): return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text) return text def search_gematria_in_db(gematria_sum: int, max_words: int) -> List[Tuple[str, str, int, int, int, str]]: global conn with sqlite3.connect(DATABASE_FILE) as conn: cursor = conn.cursor() cursor.execute(''' SELECT words, book, chapter, verse, phrase_length, word_position FROM results WHERE gematria_sum = ? AND phrase_length <= ? ''', (gematria_sum, max_words)) results = cursor.fetchall() return results def get_most_frequent_phrase(results): phrase_counts = defaultdict(int) for words, book, chapter, verse, phrase_length, word_position in results: phrase_counts[words] += 1 most_frequent_phrase = max(phrase_counts, key=phrase_counts.get) if phrase_counts else None # Handle empty results return most_frequent_phrase # --- Functions from BOS app.py --- def create_language_dropdown(label, default_value='en', show_label=True): languages = GoogleTranslator(source='en', target='en').get_supported_languages(as_dict=True) return gr.Dropdown( choices=list(languages.keys()), label=label, value=default_value, show_label=show_label ) def calculate_gematria_sum(text, date_words): if text or date_words: combined_input = f"{text} {date_words}" numbers = re.findall(r'\d+', combined_input) text_without_numbers = re.sub(r'\d+', '', combined_input) number_sum = sum(int(number) for number in numbers) text_gematria = calculate_gematria(strip_diacritics(text_without_numbers)) total_sum = text_gematria + number_sum return total_sum else: return None def perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, merge_results, include_torah, include_bible, include_quran): if step == 0 or rounds_combination == "0,0": return None torah_results = [] bible_results = [] quran_results = [] if include_torah: torah_results.extend( torah.process_json_files(1, 39, step, rounds_combination, 0, tlang, strip_spaces, strip_in_braces, strip_diacritics)) if include_bible: bible_results.extend( bible.process_json_files(40, 66, step, rounds_combination, 0, tlang, strip_spaces, strip_in_braces, strip_diacritics)) if include_quran: quran_results.extend( quran.process_json_files(1, 114, step, rounds_combination, 0, tlang, strip_spaces, strip_in_braces, strip_diacritics)) if merge_results: results = [] max_length = max(len(torah_results), len(bible_results), len(quran_results)) for i in range(max_length): if i < len(torah_results): results.append(torah_results[i]) if i < len(bible_results): results.append(bible_results[i]) if i < len(quran_results): results.append(quran_results[i]) else: results = torah_results + bible_results + quran_results return results def generate_json_dump(start, end, step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, search_phrase, results_df, start_date, end_date): """Generates the JSON dump with configuration, date range, and results.""" config = { "Start Book": start, "End Book": end, "Step": step, "Rounds": rounds_combination, "Target Language": tlang, "Strip Spaces": strip_spaces, "Strip Text in Braces": strip_in_braces, "Strip Diacritics": strip_diacritics_chk, "Search Phrase": search_phrase } result = { "Configuration": config, "DateRange": { "StartDate": start_date.strftime("%Y-%m-%d"), "EndDate": end_date.strftime("%Y-%m-%d") }, "Results": json.loads(results_df.to_json(orient='records', force_ascii=False)) } logger.info(f"Generated JSON dump: {result}") return json.dumps(result, indent=4, ensure_ascii=False) def download_json_file(config_json, step, rounds_combination, strip_spaces, strip_in_braces, strip_diacritics_chk): """Downloads the JSON config file with a descriptive name.""" filename_suffix = "" if strip_spaces: filename_suffix += "-stSp" if strip_in_braces: filename_suffix += "-stBr" if strip_diacritics_chk: filename_suffix += "-stDc" file_path = f"step-{step}-rounds-{rounds_combination}{filename_suffix}.json" # Include rounds in filename with open(file_path, "w", encoding='utf-8') as file: file.write(config_json) logger.info(f"Downloaded JSON file to: {file_path}") return file_path # --- Main Gradio App --- with gr.Blocks() as app: with gr.Row(): start_date = Calendar(type="datetime", label="Start Date") end_date = Calendar(type="datetime", label="End Date") with gr.Row(): tlang = create_language_dropdown("Target Language for Translation", default_value='english') date_language_input = create_language_dropdown("Language of the person/topic (optional) (Date Word Language)", default_value='english') with gr.Row(): gematria_text = gr.Textbox(label="Name and/or Topic (required)", value="Hans Albert Einstein") gematria_result = gr.Number(label="Journal Sum") with gr.Row(): step = gr.Number(label="Jump Width (Steps) for ELS") float_step = gr.Number(visible=False, value=1) half_step_btn = gr.Button("Steps / 2") double_step_btn = gr.Button("Steps * 2") with gr.Column(): round_x = gr.Number(label="Round (1)", value=1) round_y = gr.Number(label="Round (2)", value=-1) rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1") with gr.Row(): include_torah_chk = gr.Checkbox(label="Include Torah", value=True) include_bible_chk = gr.Checkbox(label="Include Bible", value=True) include_quran_chk = gr.Checkbox(label="Include Quran", value=True) merge_results_chk = gr.Checkbox(label="Merge Results (Torah-Bible-Quran)", value=True) strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True) strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True) strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True) translate_btn = gr.Button("Search with ELS") # --- Output Components --- markdown_output = gr.Dataframe(label="ELS Results") most_frequent_phrase_output = gr.Textbox(label="Most Frequent Phrase in Network Search") json_output = gr.Textbox(label="JSON Configuration Output") json_download_btn = gr.Button("Prepare .json for Download") json_file = gr.File(label="Download Config JSON", file_count="single") # --- Event Handlers --- def update_journal_sum(gematria_text, date_words_output): sum_value = calculate_gematria_sum(gematria_text, date_words_output) return sum_value, sum_value, sum_value def update_rounds_combination(round_x, round_y): return f"{int(round_x)},{int(round_y)}" def update_step_half(float_step): new_step = math.ceil(float_step / 2) return new_step, float_step / 2 def update_step_double(float_step): new_step = math.ceil(float_step * 2) return new_step, float_step * 2 def perform_search(start_date, end_date, date_language_input, step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, merge_results, include_torah, include_bible, include_quran, gematria_text): all_results = [] delta = timedelta(days=1) current_date = start_date while current_date <= end_date: date_words_output = translate_date_to_words(current_date, date_language_input) journal_sum, _, _ = update_journal_sum(gematria_text, date_words_output) step = journal_sum els_results = perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, merge_results, include_torah, include_bible, include_quran) # --- Network Search Integration --- updated_els_results = [] for result in els_results: try: gematria_sum = calculate_gematria(result['result_text']) except KeyError as e: print(f"DEBUG: KeyError - Key '{e.args[0]}' not found in result. Skipping this result.") continue max_words = len(result['result_text'].split()) matching_phrases = search_gematria_in_db(gematria_sum, max_words) # Iteratively increase max_words if no results are found max_words_limit = 20 # Set a limit for max_words while not matching_phrases and max_words < max_words_limit: max_words += 1 matching_phrases = search_gematria_in_db(gematria_sum, max_words) # Find most frequent phrase or first phrase with lowest word count if matching_phrases: most_frequent_phrase = get_most_frequent_phrase(matching_phrases) else: # Sort initial results by word count and take the first phrase sorted_results = sorted(search_gematria_in_db(gematria_sum, max_words_limit), key=lambda x: len(x[0].split())) most_frequent_phrase = sorted_results[0][0] if sorted_results else "" # Add most frequent phrase, date, and date_words to the result dictionary result['Most Frequent Phrase'] = most_frequent_phrase result['Date'] = current_date.strftime('%Y-%m-%d') result['Date Words'] = date_words_output updated_els_results.append(result) all_results.extend(updated_els_results) current_date += delta # --- Prepare Dataframe --- df = pd.DataFrame(all_results) df.index = range(1, len(df) + 1) df.reset_index(inplace=True) df.rename(columns={'index': 'Result Number'}, inplace=True) # Find the most frequent phrase across all dates all_phrases = [result['Most Frequent Phrase'] for result in all_results] most_frequent_phrase = max(set(all_phrases), key=all_phrases.count) if all_phrases else "" # Generate JSON output search_phrase = f"{gematria_text}" # Removed date_words_output as it's now included in each result config_json = generate_json_dump(1, 180, step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, search_phrase, df, start_date, end_date) return df, most_frequent_phrase, config_json def handle_json_download(config_json, step, rounds_combination, strip_spaces, strip_in_braces, strip_diacritics_chk): """Handles the download of the JSON config file.""" return download_json_file(config_json, step, rounds_combination, strip_spaces, strip_in_braces, strip_diacritics_chk) # --- Event Triggers --- round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination) round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination) half_step_btn.click(update_step_half, inputs=[float_step], outputs=[step, float_step]) double_step_btn.click(update_step_double, inputs=[float_step], outputs=[step, float_step]) translate_btn.click( perform_search, inputs=[start_date, end_date, date_language_input, step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk, merge_results_chk, include_torah_chk, include_bible_chk, include_quran_chk, gematria_text], outputs=[markdown_output, most_frequent_phrase_output, json_output] ) json_download_btn.click( handle_json_download, inputs=[json_output, step, rounds_combination, strip_spaces, strip_in_braces, strip_diacritics_chk], outputs=[json_file] ) if __name__ == "__main__": app.launch(share=False)