import gradio as gr import json import re import sqlite3 import logging from collections import defaultdict from util import process_json_files from gematria import calculate_gematria from deep_translator import GoogleTranslator, exceptions from urllib.parse import quote_plus # Set up logging logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') # Global variables for database connection, translator, and book names conn = None translator = None book_names = {} def initialize_database(): """Initializes the SQLite database.""" global conn conn = sqlite3.connect('gematria.db') cursor = conn.cursor() # Create tables if they don't exist cursor.execute(''' CREATE TABLE IF NOT EXISTS results ( gematria_sum INTEGER, words TEXT, translation TEXT, book TEXT, -- Store book name directly chapter INTEGER, verse INTEGER, PRIMARY KEY (gematria_sum, book, chapter, verse) ) ''') cursor.execute(''' CREATE TABLE IF NOT EXISTS processed_books ( book TEXT PRIMARY KEY, -- Store book name directly max_phrase_length INTEGER ) ''') conn.commit() logging.info("Database initialized.") def initialize_translator(): """Initializes the Google Translator.""" global translator translator = GoogleTranslator(source='iw', target='en') logging.info("Translator initialized.") def populate_database(start_book, end_book, max_phrase_length=1): """Populates the database with phrases from the Tanach and their Gematria values.""" global conn, book_names logging.info(f"Populating database with books from {start_book} to {end_book}...") cursor = conn.cursor() for book_id in range(start_book, end_book + 1): book_data = process_json_files(book_id, book_id) # Get data for the single book # process_json_files returns a dictionary with book_id as key, # so access the book data directly if book_id in book_data: book_data = book_data[book_id] if 'title' not in book_data or not isinstance(book_data['title'], str): logging.warning(f"Skipping book {book_id} due to missing or invalid 'title' field.") continue title = book_data['title'] book_names[book_id] = title # Check if the book is already processed for this max_phrase_length cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (title,)) result = cursor.fetchone() if result and result[0] >= max_phrase_length: logging.info(f"Skipping book {title}: Already processed with max_phrase_length {result[0]}") continue logging.info(f"Processing book {title} with max_phrase_length {max_phrase_length}") if 'text' not in book_data or not isinstance(book_data['text'], list): logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.") continue chapters = book_data['text'] for chapter_id, chapter in enumerate(chapters): if not isinstance(chapter, list): logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.") continue for verse_id, verse in enumerate(chapter): verse_text = flatten_text(verse) # Remove text in square brackets verse_text = re.sub(r'\[.*?\]', '', verse_text) verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text) verse_text = re.sub(r" +", " ", verse_text) words = verse_text.split() # Iterate through phrases of different lengths for length in range(1, max_phrase_length + 1): for start in range(len(words) - length + 1): phrase_candidate = " ".join(words[start:start + length]) gematria_sum = calculate_gematria(phrase_candidate.replace(" ", "")) insert_phrase_to_db(gematria_sum, phrase_candidate, title, chapter_id + 1, verse_id + 1) # Mark the book as processed for this max_phrase_length cursor.execute('''INSERT OR REPLACE INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (title, max_phrase_length)) conn.commit() logging.info("Database population complete.") def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse): """Inserts a phrase and its Gematria value into the database.""" global conn cursor = conn.cursor() try: cursor.execute(''' INSERT INTO results (gematria_sum, words, book, chapter, verse) VALUES (?, ?, ?, ?, ?) ''', (gematria_sum, phrase_candidate, book, chapter, verse)) conn.commit() logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}") except sqlite3.IntegrityError: logging.debug(f"Phrase already exists: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}") def get_translation(phrase): """Retrieves or generates the English translation of a Hebrew phrase.""" global translator, conn cursor = conn.cursor() cursor.execute(''' SELECT translation FROM results WHERE words = ? ''', (phrase,)) result = cursor.fetchone() if result and result[0]: return result[0] else: translation = translate_and_store(phrase) cursor.execute(''' UPDATE results SET translation = ? WHERE words = ? ''', (translation, phrase)) conn.commit() return translation def translate_and_store(phrase): """Translates a Hebrew phrase to English using Google Translate and handles potential errors.""" global translator max_retries = 3 retries = 0 while retries < max_retries: try: translation = translator.translate(phrase) logging.debug(f"Translated phrase: {translation}") return translation except (exceptions.TranslationNotFound, exceptions.NotValidPayload, exceptions.ServerException, exceptions.RequestError, requests.exceptions.ConnectionError) as e: retries += 1 logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})") logging.error(f"Failed to translate phrase '{phrase}' after {max_retries} retries.") return "[Translation Error]" def search_gematria_in_db(gematria_sum): """Searches the database for phrases with a given Gematria value.""" global conn cursor = conn.cursor() cursor.execute(''' SELECT words, book, chapter, verse FROM results WHERE gematria_sum = ? ''', (gematria_sum,)) results = cursor.fetchall() logging.debug(f"Found {len(results)} matching phrases for Gematria: {gematria_sum}") return results def gematria_search_interface(phrase): """The main function for the Gradio interface.""" if not phrase.strip(): return "Please enter a phrase." global conn, book_names conn = sqlite3.connect('gematria.db') cursor = conn.cursor() phrase_gematria = calculate_gematria(phrase.replace(" ", "")) logging.info(f"Searching for phrases with Gematria: {phrase_gematria}") matching_phrases = search_gematria_in_db(phrase_gematria) if not matching_phrases: return "No matching phrases found." # Sort results by book, chapter, and verse sorted_phrases = sorted(matching_phrases, key=lambda x: (x[1], x[2], x[3])) # Group results by book results_by_book = defaultdict(list) for words, book, chapter, verse in sorted_phrases: results_by_book[book].append((words, chapter, verse)) # Format results for display results = [] results.append("
Chapter: {chapter}, Verse: {verse}
Hebrew Phrase: {words}
Translation: {translation}
[See on Bible Gateway]