Spaces:

neuralworm
/

daily_psalm

Running

File size: 15,082 Bytes

import json
import os
import logging
import sqlite3
import re
from typing import Dict, List, Any
from gematria import calculate_gematria, strip_diacritics
from deep_translator import GoogleTranslator

logger = logging.getLogger(__name__)

def process_bible_files(start: int, end: int) -> Dict[int, Dict[str, Any]]:
    """
    Processes Bible JSON files and returns a dictionary mapping book IDs to their data.

    Args:
        start: The starting book ID (inclusive).
        end: The ending book ID (inclusive).

    Returns:
        A dictionary where keys are book IDs and values are dictionaries
        containing 'title' and 'text' fields.
    """
    base_path = "texts/bible"
    results = {}

    for i in range(start, end + 1):
        file_name = f"{base_path}/{i}.json"
        try:
            with open(file_name, 'r', encoding='utf-8') as file:
                data = json.load(file)
                if data:
                    # Extract title and verses
                    title = data.get("title", "No title")
                    text = data.get("text", [])

                    # Store book ID as key and book data as value
                    results[i] = {"title": title, "text": text}

        except FileNotFoundError:
            logger.warning(f"File {file_name} not found.")
        except json.JSONDecodeError as e:
            logger.warning(f"File {file_name} could not be read as JSON: {e}")
        except Exception as e:
            logger.warning(f"Error processing {file_name}: {e}")

    return results

def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip_spaces=True, strip_in_braces=True,
                       strip_diacritics_value=True, translate=False):
    """
    Processes Bible JSON files and performs ELS (Equidistant Letter Sequence) search.

    Parameters:
    - start (int): Start number of the Bible book.
    - end (int): End number of the Bible book.
    - step (int): Step size for character selection.
    - rounds (str): Comma-separated list of round numbers (can include negative values).
    - length (int): Maximum length of the result text.
    - tlang (str): Target language for translation.
    - strip_spaces (bool): Whether to remove spaces from the text.
    - strip_in_braces (bool): Whether to remove text within braces.
    - strip_diacritics_value (bool): Whether to remove diacritics from the text.
    - translate (bool): Whether to translate the result text.

    Returns:
    - list: A list of dictionaries containing processed data or error messages.
    """
    logger.debug(f"Processing Bible files {start}-{end} with step {step}, rounds {rounds}")
    results = []
    
    try:
        bible_data = process_bible_files(start, end)
        
        if not bible_data:
            return [{"error": f"No Bible data found for books {start}-{end}"}]
        
        rounds_list = [int(r.strip()) for r in rounds.split(",")]
        
        for book_id, book_info in bible_data.items():
            book_title = book_info.get("title", "Unknown")
            chapters = book_info.get("text", [])
            
            if not chapters:
                results.append({"error": f"No text found for book {book_title} (ID: {book_id})"})
                continue
            
            # Flatten the text
            flattened_text = ""
            for chapter_idx, chapter in enumerate(chapters, 1):
                for verse_idx, verse in enumerate(chapter, 1):
                    if verse:
                        flattened_text += verse + " "
            
            # Clean the text based on parameters
            processed_text = flattened_text.lower()
            
            if strip_in_braces:
                # Remove content within brackets or parentheses
                processed_text = re.sub(r'\[.*?\]|\(.*?\)', '', processed_text)
            
            if strip_diacritics_value:
                processed_text = strip_diacritics(processed_text)
                
            if strip_spaces:
                processed_text = processed_text.replace(" ", "")
            
            # Perform ELS search for each round
            for round_num in rounds_list:
                if round_num == 0:
                    continue
                
                direction = 1 if round_num > 0 else -1
                abs_step = abs(round_num * step)
                
                if direction > 0:
                    # Forward ELS
                    result_chars = [processed_text[i] for i in range(0, len(processed_text), abs_step) if i < len(processed_text)]
                else:
                    # Backward ELS
                    result_chars = [processed_text[i] for i in range(len(processed_text) - 1, -1, -abs_step)]
                
                result_text = "".join(result_chars)
                
                # Truncate result if length is specified
                if length > 0 and len(result_text) > length:
                    result_text = result_text[:length]
                
                # Translate if requested
                translated_text = ""
                if result_text and translate and tlang != "en":
                    try:
                        translator = GoogleTranslator(source='auto', target=tlang)
                        translated_text = translator.translate(result_text)
                    except Exception as e:
                        logger.warning(f"Translation error: {e}")
                        translated_text = f"Translation error: {str(e)}"
                
                # Add result to results list
                results.append({
                    "book_id": book_id,
                    "book_title": book_title,
                    "step": step,
                    "round": round_num,
                    "result_text": result_text,
                    "translated_text": translated_text,
                    "gematria": calculate_gematria(result_text)
                })
                
    except Exception as e:
        logger.error(f"Error processing Bible files: {e}", exc_info=True)
        results.append({"error": f"Error processing Bible files: {str(e)}"})
    
    return results if results else None

# This function is not needed anymore as we're using get_first_els_result_matthew from app.py
# Keeping the definition for compatibility but marking it as deprecated
def get_first_els_result_john(gematria_sum, tlang="en"):
    """
    DEPRECATED: Use get_first_els_result_matthew instead.
    Gets the first ELS result from John's Gospel (book 43) using the specified step size.
    """
    logger.warning("get_first_els_result_john is deprecated, use get_first_els_result_matthew instead")
    from app import cached_process_json_files, get_first_els_result_matthew
    
    return get_first_els_result_matthew(gematria_sum, tlang)

def create_bible_display_iframe(book_title, book_id, chapter=None, verse=None):
    """Creates an iframe HTML string for BibleGateway."""
    from urllib.parse import quote_plus
    
    logger.debug(f"Creating Bible iframe for {book_title}, book_id: {book_id}, chapter: {chapter}, verse: {verse}")
    
    encoded_book_title = quote_plus(book_title)
    chapter_verse = ""
    if chapter is not None:
        chapter_verse = f"+{chapter}"
        if verse is not None:
            chapter_verse += f":{verse}"
    
    url = f"https://www.biblegateway.com/passage/?search={encoded_book_title}{chapter_verse}&version=CJB"
    iframe = f'<iframe src="{url}" width="800" height="600"></iframe>'
    
    return iframe

def initialize_bible_database(db_file: str = 'bible.db', max_phrase_length: int = 1):
    """
    Initializes the Bible database with verse texts.
    This function processes all Bible JSON files and adds their gematria values to the database.
    
    Args:
        db_file: The SQLite database file to use
        max_phrase_length: Maximum phrase length to process
    """
    import re
    from gematria import calculate_gematria, strip_diacritics
    from tqdm import tqdm  # Import tqdm for progress bars
    
    logger.info(f"Initializing Bible database: {db_file}")
    
    # Create the database if it doesn't exist
    with sqlite3.connect(db_file) as conn:
        cursor = conn.cursor()
        # Create results table
        cursor.execute('''
        CREATE TABLE IF NOT EXISTS results (
            gematria_sum INTEGER,
            words TEXT,
            translation TEXT,
            book TEXT,
            chapter INTEGER,
            verse INTEGER,
            phrase_length INTEGER,
            word_position TEXT,
            PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position)
        )
        ''')
        
        cursor.execute('''
        CREATE INDEX IF NOT EXISTS idx_results_gematria
        ON results (gematria_sum)
        ''')
        
        # Create processed_books table to track processing
        cursor.execute('''
        CREATE TABLE IF NOT EXISTS processed_books (
            book TEXT PRIMARY KEY,
            max_phrase_length INTEGER
        )
        ''')
        
        conn.commit()
    
    # Process Bible files from books 40-66 (New Testament)
    book_start = 40
    book_end = 66
    logger.info(f"Processing Bible books {book_start}-{book_end}")
    
    # Global counter for word position tracking
    total_word_count = 0
    book_names = {}
    
    with sqlite3.connect(db_file) as conn:
        cursor = conn.cursor()
        
        # Process each book
        for book_id in tqdm(range(book_start, book_end + 1), desc="Processing Bible Books"):
            # Load book data
            book_data = process_bible_files(book_id, book_id)
            
            if book_id in book_data:
                book_info = book_data[book_id]
                book_title = book_info['title']
                book_names[book_id] = book_title
                
                # Check if this book has already been processed
                cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_title,))
                result = cursor.fetchone()
                if result and result[0] >= max_phrase_length:
                    logger.info(f"Skipping book {book_title}: Already processed with max_phrase_length {result[0]}")
                    continue
                
                chapters = book_info['text']
                phrases_to_insert = []
                
                for chapter_idx, chapter in enumerate(chapters, 1):
                    for verse_idx, verse_text in enumerate(chapter, 1):
                        if not verse_text:
                            continue
                            
                        # Split verse into words
                        words = verse_text.split()
                        
                        # Process phrases of different lengths
                        for length in range(1, max_phrase_length + 1):
                            for start in range(len(words) - length + 1):
                                phrase = " ".join(words[start:start + length])
                                cleaned_phrase = strip_diacritics(phrase)
                                gematria_sum = calculate_gematria(cleaned_phrase.replace(" ", ""))
                                
                                # Calculate word position range
                                word_position_range = f"{total_word_count + start + 1}-{total_word_count + start + length}"
                                
                                # Add to batch insert list
                                phrases_to_insert.append(
                                    (gematria_sum, cleaned_phrase, "", book_title, chapter_idx, verse_idx, length, word_position_range)
                                )
                        
                        # Update total word count after processing each verse
                        total_word_count += len(words)
                
                # If we have phrases to insert, do a batch insert
                if phrases_to_insert:
                    try:
                        cursor.executemany('''
                        INSERT OR REPLACE INTO results
                        (gematria_sum, words, translation, book, chapter, verse, phrase_length, word_position)
                        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                        ''', phrases_to_insert)
                        
                        # Update the processed_books table
                        cursor.execute('''
                        INSERT OR REPLACE INTO processed_books (book, max_phrase_length)
                        VALUES (?, ?)
                        ''', (book_title, max_phrase_length))
                        
                        conn.commit()
                        logger.info(f"Processed book {book_title}: inserted {len(phrases_to_insert)} phrases")
                    except sqlite3.Error as e:
                        logger.error(f"Database error processing {book_title}: {e}")
            else:
                logger.warning(f"No data found for book ID {book_id}")
    
    logger.info(f"Bible database initialization completed. Processed {len(book_names)} books.")
    return book_names

def find_shortest_bible_match(gematria_sum: int, db_file: str = 'bible.db') -> Dict[str, Any]:
    """
    Finds the shortest Bible verse in John that matches the given gematria sum.
    
    Args:
        gematria_sum: The gematria sum to match
        db_file: The SQLite database file to search in
        
    Returns:
        A dictionary with the matching verse information or None if no match is found
    """
    logger.debug(f"Finding shortest Bible match for gematria sum: {gematria_sum} in {db_file}")
    
    try:
        with sqlite3.connect(db_file) as conn:
            cursor = conn.cursor()
            cursor.execute('''
                SELECT words, book, chapter, verse, phrase_length, word_position
                FROM results
                WHERE gematria_sum = ? AND book = 'Revelation'
                ORDER BY LENGTH(words) ASC
                LIMIT 1
            ''', (gematria_sum,))
            
            result = cursor.fetchone()
            
            if result:
                logger.debug(f"Found Bible match: {result}")
                return {
                    "words": result[0],
                    "book": result[1],
                    "chapter": result[2],
                    "verse": result[3],
                    "phrase_length": result[4],
                    "word_position": result[5] if len(result) > 5 else None
                }
            else:
                logger.debug(f"No matching verse found in John for gematria sum: {gematria_sum}")
                return None
                
    except sqlite3.Error as e:
        logger.error(f"Database error when finding Bible match: {e}")
        return None
    except Exception as e:
        logger.error(f"Unexpected error when finding Bible match: {e}")
        return None