gematria_date_sums

Running

bartman081523

book:unknown fix

26e6a7b 12 months ago

10.6 kB

	import gradio as gr
	import json
	import re
	import sqlite3
	import logging
	from collections import defaultdict
	from util import process_json_files
	from gematria import calculate_gematria
	from deep_translator import GoogleTranslator, exceptions
	from urllib.parse import quote_plus

	# Set up logging
	logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

	# Global variables for database connection, translator, and book names
	conn = None
	translator = None
	book_names = {}

	def initialize_database():
	"""Initializes the SQLite database."""
	global conn
	conn = sqlite3.connect('gematria.db')
	cursor = conn.cursor()

	# Create tables if they don't exist
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS results (
	gematria_sum INTEGER,
	words TEXT,
	translation TEXT,
	book TEXT, -- Store book name directly
	chapter INTEGER,
	verse INTEGER,
	PRIMARY KEY (gematria_sum, book, chapter, verse)
	)
	''')
	cursor.execute('''
	CREATE TABLE IF NOT EXISTS processed_books (
	book TEXT PRIMARY KEY, -- Store book name directly
	max_phrase_length INTEGER
	)
	''')
	conn.commit()
	logging.info("Database initialized.")

	def initialize_translator():
	"""Initializes the Google Translator."""
	global translator
	translator = GoogleTranslator(source='iw', target='en')
	logging.info("Translator initialized.")

	def populate_database(start_book, end_book, max_phrase_length=1):
	"""Populates the database with phrases from the Tanach and their Gematria values."""
	global conn, book_names
	logging.info(f"Populating database with books from {start_book} to {end_book}...")
	cursor = conn.cursor()

	for book_id in range(start_book, end_book + 1):
	book_data = process_json_files(book_id, book_id) # Get data for the single book

	# process_json_files returns a dictionary with book_id as key,
	# so access the book data directly
	if book_id in book_data:
	book_data = book_data[book_id]
	if 'title' not in book_data or not isinstance(book_data['title'], str):
	logging.warning(f"Skipping book {book_id} due to missing or invalid 'title' field.")
	continue

	title = book_data['title']
	book_names[book_id] = title

	# Check if the book is already processed for this max_phrase_length
	cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (title,))
	result = cursor.fetchone()
	if result and result[0] >= max_phrase_length:
	logging.info(f"Skipping book {title}: Already processed with max_phrase_length {result[0]}")
	continue

	logging.info(f"Processing book {title} with max_phrase_length {max_phrase_length}")

	if 'text' not in book_data or not isinstance(book_data['text'], list):
	logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
	continue

	chapters = book_data['text']
	for chapter_id, chapter in enumerate(chapters):
	if not isinstance(chapter, list):
	logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
	continue
	for verse_id, verse in enumerate(chapter):
	verse_text = flatten_text(verse)
	# Remove text in square brackets
	verse_text = re.sub(r'\[.*?\]', '', verse_text)
	verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
	verse_text = re.sub(r" +", " ", verse_text)
	words = verse_text.split()

	# Iterate through phrases of different lengths
	for length in range(1, max_phrase_length + 1):
	for start in range(len(words) - length + 1):
	phrase_candidate = " ".join(words[start:start + length])
	gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
	insert_phrase_to_db(gematria_sum, phrase_candidate, title, chapter_id + 1, verse_id + 1)

	# Mark the book as processed for this max_phrase_length
	cursor.execute('''INSERT OR REPLACE INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (title, max_phrase_length))
	conn.commit()
	logging.info("Database population complete.")

	def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse):
	"""Inserts a phrase and its Gematria value into the database."""
	global conn
	cursor = conn.cursor()
	try:
	cursor.execute('''
	INSERT INTO results (gematria_sum, words, book, chapter, verse)
	VALUES (?, ?, ?, ?, ?)
	''', (gematria_sum, phrase_candidate, book, chapter, verse))
	conn.commit()
	logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
	except sqlite3.IntegrityError:
	logging.debug(f"Phrase already exists: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")

	def get_translation(phrase):
	"""Retrieves or generates the English translation of a Hebrew phrase."""
	global translator, conn
	cursor = conn.cursor()
	cursor.execute('''
	SELECT translation FROM results
	WHERE words = ?
	''', (phrase,))
	result = cursor.fetchone()
	if result and result[0]:
	return result[0]
	else:
	translation = translate_and_store(phrase)
	cursor.execute('''
	UPDATE results
	SET translation = ?
	WHERE words = ?
	''', (translation, phrase))
	conn.commit()
	return translation

	def translate_and_store(phrase):
	"""Translates a Hebrew phrase to English using Google Translate and handles potential errors."""
	global translator
	max_retries = 3
	retries = 0

	while retries < max_retries:
	try:
	translation = translator.translate(phrase)
	logging.debug(f"Translated phrase: {translation}")
	return translation
	except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
	exceptions.ServerException, exceptions.RequestError, requests.exceptions.ConnectionError) as e:
	retries += 1
	logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})")

	logging.error(f"Failed to translate phrase '{phrase}' after {max_retries} retries.")
	return "[Translation Error]"

	def search_gematria_in_db(gematria_sum):
	"""Searches the database for phrases with a given Gematria value."""
	global conn
	cursor = conn.cursor()
	cursor.execute('''
	SELECT words, book, chapter, verse FROM results WHERE gematria_sum = ?
	''', (gematria_sum,))
	results = cursor.fetchall()
	logging.debug(f"Found {len(results)} matching phrases for Gematria: {gematria_sum}")
	return results

	def gematria_search_interface(phrase):
	"""The main function for the Gradio interface."""
	if not phrase.strip():
	return "Please enter a phrase."

	global conn, book_names
	conn = sqlite3.connect('gematria.db')
	cursor = conn.cursor()

	phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
	logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")

	matching_phrases = search_gematria_in_db(phrase_gematria)
	if not matching_phrases:
	return "No matching phrases found."

	# Sort results by book, chapter, and verse
	sorted_phrases = sorted(matching_phrases, key=lambda x: (x[1], x[2], x[3]))

	# Group results by book
	results_by_book = defaultdict(list)
	for words, book, chapter, verse in sorted_phrases:
	results_by_book[book].append((words, chapter, verse))

	# Format results for display
	results = []
	results.append("<div class='results-container'>")
	for book, phrases in results_by_book.items():
	results.append(f"<h4>Book: {book}</h4>") # Directly display book name
	for words, chapter, verse in phrases:
	translation = get_translation(words)
	link = f"https://www.biblegateway.com/passage/?search={quote_plus(book)}+{chapter}%3A{verse}&version=CJB"
	results.append(f"""
	<div class='result-item'>
	<p>Chapter: {chapter}, Verse: {verse}</p>
	<p class='hebrew-phrase'>Hebrew Phrase: {words}</p>
	<p>Translation: {translation}</p>
	<a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a>
	</div>
	""")
	results.append("</div>") # Close results-container div

	conn.close()

	# Add CSS styling
	style = """
	<style>
	.results-container {
	display: grid;
	grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
	gap: 20px;
	}

	.result-item {
	border: 1px solid #ccc;
	padding: 15px;
	border-radius: 5px;
	box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1);
	}

	.hebrew-phrase {
	font-family: 'SBL Hebrew', 'Ezra SIL', serif;
	direction: rtl;
	}

	.bible-link {
	display: block;
	margin-top: 10px;
	color: #007bff;
	text-decoration: none;
	}
	</style>
	"""

	return style + "\n".join(results)

	def flatten_text(text):
	"""Helper function to flatten nested lists into a single list."""
	if isinstance(text, list):
	return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
	return text

	def run_app():
	"""Initializes and launches the Gradio app."""
	initialize_database()
	initialize_translator()

	# Pre-populate the database
	populate_database(1, 39, max_phrase_length=1) # Books 1 to 39 (adjust as needed)
	#populate_database(27, 27, max_phrase_length=1) # Book 27 (Psalms) - adjust as needed

	iface = gr.Interface(
	fn=gematria_search_interface,
	inputs=gr.Textbox(label="Enter phrase"),
	outputs=gr.HTML(label="Results"),
	title="Gematria Search in Tanach",
	description="Search for phrases in the Tanach that have the same Gematria value.",
	live=False,
	allow_flagging="never"
	)
	iface.launch()

	if __name__ == "__main__":
	run_app()