File size: 6,201 Bytes
1032a12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# translation_utils.py

import logging
import sqlite3
from concurrent.futures import ThreadPoolExecutor
import functools
from deep_translator import GoogleTranslator, exceptions

# Constants
TRANSLATION_DATABASE_FILE = 'translation_database.db'
SUPPORTED_LANGUAGES = {"af", "sq", "am", "ar", "hy", "az", "eu", "be", "bn", "bs", "bg", "ca", "ceb", "ny", "zh-CN", "zh-TW", "co", "hr", "cs", "da", "nl", "en", "eo", "et", "tl", "fi", "fr", "fy", "gl", "ka", "de", "el", "gu", "ht", "ha", "haw", "iw", "hi", "hmn", "hu", "is", "ig", "id", "ga", "it", "ja", "jw", "kn", "kk", "km", "ko", "ku", "ky", "lo", "la", "lv", "lt", "lb", "mk", "mg", "ms", "ml", "mt", "mi", "mr", "mn", "my", "ne", "no", "ps", "fa", "pl", "pt", "pa", "ro", "ru", "sm", "gd", "sr", "st", "sn", "sd", "si", "sk", "sl", "so", "es", "su", "sw", "sv", "tg", "ta", "te", "th", "tr", "uk", "ur", "uz", "vi", "cy", "xh", "yi", "yo", "zu"}

# Initialize logger
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


def create_translation_table():
    """Creates the translation table if it doesn't exist."""
    try:
        with sqlite3.connect(TRANSLATION_DATABASE_FILE) as conn:
            cursor = conn.cursor()
            cursor.execute('''
                CREATE TABLE IF NOT EXISTS translations (
                    phrase TEXT PRIMARY KEY
                )
            ''')

            # Dynamically add language columns
            cursor.execute("PRAGMA table_info(translations)")
            existing_columns = {col[1] for col in cursor.fetchall()}
            for lang_code in SUPPORTED_LANGUAGES:
                column_name = lang_code.replace('-', '_')
                if column_name == "is":  # Handle reserved keywords in SQLite
                    column_name = "is_"
                if column_name not in existing_columns:
                    try:
                        cursor.execute(f"ALTER TABLE translations ADD COLUMN `{column_name}` TEXT")
                        logger.info(f"Added column '{column_name}' to translations table.")
                    except sqlite3.OperationalError as e:
                        if "duplicate column name" in str(e).lower():
                            logger.debug(f"Column '{column_name}' already exists. Skipping.")
                        else:
                            logger.error(f"Error adding column '{column_name}': {e}")  # More specific error
            conn.commit()
    except Exception as e:  # Broad exception handling to catch any other potential issues
        logger.error(f"An unexpected error occurred in create_translation_table: {e}")


@functools.lru_cache(maxsize=1000)  # Use the correct decorator name
def translate_cached(text, target_language, source_language="auto"):  # Renamed to avoid conflicts
    """Translates text using Google Translate with caching."""
    if not text:
        return ""

    try:
        translator = GoogleTranslator(source=source_language, target=target_language)
        translated = translator.translate(text)
        return translated
    except exceptions.TranslationNotFound:
        logger.error(f"Translation not found for: {text}")
    except Exception as e:  # Catch generic exceptions
        logger.exception(f"Translation error: {e}")  # Log with traceback
    return None


def get_translation(phrase, target_language, source_language="auto"):
    """Retrieves a translation from the database or translates and stores it."""
    if target_language not in SUPPORTED_LANGUAGES:
        logger.error(f"Unsupported target language: {target_language}")
        return None, False  # Return None and False for failure

    try:
        with sqlite3.connect(TRANSLATION_DATABASE_FILE) as conn:
            cursor = conn.cursor()
            column_name = target_language.replace('-', '_')
            if column_name == "is":
                column_name = "is_"

            cursor.execute(f"SELECT `{column_name}` FROM translations WHERE phrase=?", (phrase,))
            result = cursor.fetchone()

            if result and result[0]:  # Check that the result is not empty and has a translated value
                return result[0], True

            translated_text = translate_cached(phrase, target_language, source_language)
            if translated_text:
                cursor.execute(f"""
                    INSERT INTO translations (phrase, `{column_name}`) VALUES (?, ?)
                    ON CONFLICT(phrase) DO UPDATE SET `{column_name}`=excluded.`{column_name}`
                """, (phrase, translated_text))

                conn.commit()
                return translated_text, True
            else:
                return None, False  # Explicitly return False when translation fails


    except sqlite3.Error as e:
        logger.error(f"Database error: {e}")
        return None, False # Return explicit failure indicator
    except Exception as e:
        logger.exception(f"Unexpected error in get_translation: {e}") # Generic Exception Catch-All
        return None, False  # Return explicit failure indicator


def batch_translate(phrases, target_language, source_languages): # Takes a list of source languages
    """Translates multiple phrases concurrently, respecting source language."""

    with ThreadPoolExecutor() as executor:
        # Use source language for each phrase
        futures = [executor.submit(get_translation, phrase, target_language, source_languages[i])
                   for i, phrase in enumerate(phrases) if phrase] # Only translate if phrase is not empty

        results = [future.result() for future in futures] # Only includes results from phrases that were translated


    translations = {}
    j=0 #index for results
    for i, phrase in enumerate(phrases):
        if phrase: #if phrase was translated
            translations[phrase] = results[j][0] #Store translation (results[j] is a tuple (translation, success_flag), take the first element)
            j+=1
        else: # If phrase is empty
             translations[phrase] = None #Or translations[phrase] = "" # Alternatively assign empty string if not translating empty phrases

    return translations