Spaces:
Running
Running
File size: 5,813 Bytes
3b58069 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
# translation_utils.py
import logging
import sqlite3
from concurrent.futures import ThreadPoolExecutor
import functools
from deep_translator import GoogleTranslator, ReversoTranslator, exceptions
# Constants
TRANSLATION_DATABASE_FILE = 'translation_database.db'
SUPPORTED_LANGUAGES = {"af", "sq", "am", "ar", "hy", "az", "eu", "be", "bn", "bs", "bg", "ca", "ceb", "ny", "zh-CN", "zh-TW", "co", "hr", "cs", "da", "nl", "en", "eo", "et", "tl", "fi", "fr", "fy", "gl", "ka", "de", "el", "gu", "ht", "ha", "haw", "iw", "hi", "hmn", "hu", "is", "ig", "id", "ga", "it", "ja", "jw", "kn", "kk", "km", "ko", "ku", "ky", "lo", "la", "lv", "lt", "lb", "mk", "mg", "ms", "ml", "mt", "mi", "mr", "mn", "my", "ne", "no", "ps", "fa", "pl", "pt", "pa", "ro", "ru", "sm", "gd", "sr", "st", "sn", "sd", "si", "sk", "sl", "so", "es", "su", "sw", "sv", "tg", "ta", "te", "th", "tr", "uk", "ur", "uz", "vi", "cy", "xh", "yi", "yo", "zu"}
# Initialize logger
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def create_translation_table():
"""Creates the translation table if it doesn't exist."""
try:
with sqlite3.connect(TRANSLATION_DATABASE_FILE) as conn:
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS translations (
phrase TEXT PRIMARY KEY
)
''')
# Dynamically add language columns
cursor.execute("PRAGMA table_info(translations)")
existing_columns = {col[1] for col in cursor.fetchall()}
for lang_code in SUPPORTED_LANGUAGES:
column_name = lang_code.replace('-', '_')
if column_name == "is": # Handle reserved keywords in SQLite
column_name = "is_"
if column_name not in existing_columns:
try:
cursor.execute(f"ALTER TABLE translations ADD COLUMN `{column_name}` TEXT")
logger.info(f"Added column '{column_name}' to translations table.")
except sqlite3.OperationalError as e:
if "duplicate column name" in str(e).lower():
logger.debug(f"Column '{column_name}' already exists. Skipping.")
else:
logger.error(f"Error adding column '{column_name}': {e}") # More specific error
conn.commit()
except Exception as e: # Broad exception handling to catch any other potential issues
logger.error(f"An unexpected error occurred in create_translation_table: {e}")
@functools.lru_cache(maxsize=1000)
def translate_cached(text, target_language, source_language="auto"):
"""Translates text using Google Translate or Reverso, with caching."""
if not text:
return ""
try:
#if source_language == "he" or source_language == "ar":
# translator = ReversoTranslator(source=source_language, target=target_language)
#else:
# translator = GoogleTranslator(source=source_language, target=target_language)
if source_language=="he":
source_language="iw"
translator = GoogleTranslator(source=source_language, target=target_language)
translated = translator.translate(text)
return translated
except exceptions.TranslationNotFound:
logger.error(f"Translation not found for: {text}")
except Exception as e:
logger.exception(f"Translation error: {e}")
return "" # Return empty string on error
def get_translation(phrase, target_language, source_language="auto"):
"""Retrieves a translation from the database or translates and stores it."""
if target_language not in SUPPORTED_LANGUAGES:
logger.error(f"Unsupported target language: {target_language}")
return None, False # Return None and False for failure
try:
with sqlite3.connect(TRANSLATION_DATABASE_FILE) as conn:
cursor = conn.cursor()
column_name = target_language.replace('-', '_')
if column_name == "is":
column_name = "is_"
cursor.execute(f"SELECT `{column_name}` FROM translations WHERE phrase=?", (phrase,))
result = cursor.fetchone()
if result and result[0]:
return result[0], True
translated_text = translate_cached(phrase, target_language, source_language)
if translated_text:
cursor.execute(f"""
INSERT INTO translations (phrase, `{column_name}`) VALUES (?, ?)
ON CONFLICT(phrase) DO UPDATE SET `{column_name}`=excluded.`{column_name}`
""", (phrase, translated_text))
conn.commit()
return translated_text, True
else:
return None, False
except sqlite3.Error as e:
logger.error(f"Database error: {e}")
return None, False
except Exception as e:
logger.exception(f"Unexpected error in get_translation: {e}")
return None, False
def batch_translate(phrases, target_language, source_languages):
"""Translates multiple phrases concurrently, respecting source language."""
with ThreadPoolExecutor() as executor:
futures = [executor.submit(get_translation, phrase, target_language, source_languages[i])
for i, phrase in enumerate(phrases) if phrase]
results = [future.result() for future in futures]
translations = {}
j=0
for i, phrase in enumerate(phrases):
if phrase:
translations[phrase] = results[j][0]
j+=1
else:
translations[phrase] = ""
return translations
|