import sqlite3 import logging from deep_translator import GoogleTranslator, exceptions from tqdm import tqdm import threading import time from queue import Queue # Constants DATABASE_FILE = 'gematria.db' # Use your actual database file name BATCH_SIZE = 1000 NUM_THREADS = 10 # Number of parallel translation threads # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # Initialize the translator translator = GoogleTranslator(source='yi', target='en') logging.info("Translator initialized.") # Separate Queue and tqdm translation_queue = Queue() # Regular queue translation_queue_tqdm = tqdm(total=0, dynamic_ncols=True, desc="Translation Queue") # tqdm for the queue total_translations_tqdm = tqdm(total=0, dynamic_ncols=True, desc="Total Translations") # tqdm for overall progress # Lock for database access db_lock = threading.Lock() translations_completed = 0 # Counter for completed translations def translate_and_store(phrase: str) -> str: """Translates a Hebrew phrase to English using Google Translate.""" global translator max_retries = 3 retries = 0 while retries < max_retries: try: translation = translator.translate(phrase) return translation except (exceptions.TranslationNotFound, exceptions.NotValidPayload, exceptions.ServerException, exceptions.RequestError) as e: retries += 1 logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})") logging.error(f"Failed to translate phrase '{phrase}' after {max_retries} retries.") return None def translation_worker(): """Worker thread to process translations from the queue.""" global conn, translator, translation_queue, db_lock, translation_queue_tqdm, translations_completed, total_translations_tqdm while True: phrase = translation_queue.get() # Get from the actual queue translation_queue_tqdm.update() # Update the tqdm progress bar if phrase is None: # Sentinel value to stop the thread break translation = translate_and_store(phrase) # Acquire the lock before any database interaction for this phrase with db_lock: with sqlite3.connect(DATABASE_FILE) as conn: cursor = conn.cursor() if translation is not None: cursor.execute("UPDATE results SET translation = ? WHERE words = ?", (translation, phrase)) translations_completed += 1 # Increment the global counter total_translations_tqdm.update() # Update the overall progress bar conn.commit() translation_queue.task_done() def populate_translations(): """Populates translations for all Hebrew phrases in the database.""" global conn, translator, translation_queue, translation_queue_tqdm, total_translations_tqdm with sqlite3.connect(DATABASE_FILE) as conn: cursor = conn.cursor() # Get the total count of distinct phrases needing translation cursor.execute("SELECT COUNT(DISTINCT words) FROM results WHERE translation IS NULL") total_phrases = cursor.fetchone()[0] logging.info(f"Found {total_phrases} distinct phrases to translate.") # Get distinct Hebrew phrases that need translation using a generator cursor.execute("SELECT DISTINCT words FROM results WHERE translation IS NULL") phrases_generator = (phrase for phrase, in cursor) # Use a generator for tqdm # Set the total for both tqdm progress bars translation_queue_tqdm.total = total_phrases total_translations_tqdm.total = total_phrases # Build the translation queue first for phrase in phrases_generator: translation_queue.put(phrase) # Put into the actual queue translation_queue_tqdm.update() # Update tqdm progress bar # Close the translation queue tqdm after it's fully populated translation_queue_tqdm.close() # Start worker threads AFTER the queue is built threads = [] for _ in range(NUM_THREADS): thread = threading.Thread(target=translation_worker) thread.start() threads.append(thread) # Wait for all tasks to be completed translation_queue.join() # Stop worker threads for _ in range(NUM_THREADS): translation_queue.put(None) # Sentinel value to stop threads for thread in threads: thread.join() logging.info("All translations completed.") def save_translations_periodically(): """Saves translations to the database every minute.""" while True: time.sleep(60) # Wait for 1 minute logging.info("Saving translations to the database...") with db_lock: # Acquire the lock before saving with sqlite3.connect(DATABASE_FILE) as conn: conn.commit() logging.info("Translations saved.") if __name__ == "__main__": # Start the translation process in a separate thread translation_thread = threading.Thread(target=populate_translations) translation_thread.start() # Start the periodic saving thread save_thread = threading.Thread(target=save_translations_periodically) save_thread.start() # Keep the main thread alive while True: time.sleep(1)