File size: 14,651 Bytes
14f9743
 
 
 
e473db8
908093b
237c2ec
908093b
 
 
 
 
c2616df
 
 
14f9743
a73c69c
e6670d8
908093b
 
 
 
14f9743
e473db8
 
 
237c2ec
e473db8
 
908093b
 
 
c2616df
 
908093b
 
 
 
c2616df
 
908093b
 
 
c2616df
14f9743
908093b
 
c2616df
e473db8
908093b
 
 
 
c2616df
 
e473db8
908093b
 
 
 
 
 
 
 
c2616df
14f9743
c2616df
679fef8
c2616df
 
908093b
 
 
c2616df
908093b
 
 
 
 
 
 
 
 
 
 
 
e6670d8
c2616df
e473db8
908093b
14f9743
a6139ac
 
d2a4684
 
e6670d8
e473db8
d2a4684
14f9743
de42686
e6670d8
 
 
 
 
 
 
14f9743
 
237c2ec
a6139ac
a73c69c
e473db8
 
 
870cc02
14f9743
a73c69c
e6670d8
237c2ec
e6670d8
 
e473db8
908093b
 
870cc02
908093b
 
e473db8
14f9743
e6670d8
908093b
e473db8
1594016
e6670d8
908093b
237c2ec
908093b
e473db8
908093b
e473db8
908093b
e473db8
908093b
 
 
 
14f9743
e473db8
908093b
 
e6670d8
908093b
 
 
14f9743
908093b
237c2ec
908093b
e6670d8
237c2ec
908093b
e6670d8
e473db8
 
908093b
 
e473db8
398c940
237c2ec
e6670d8
 
237c2ec
 
 
e473db8
e6670d8
 
237c2ec
908093b
 
398c940
908093b
 
e473db8
14f9743
e6670d8
908093b
237c2ec
e6670d8
908093b
e6670d8
 
908093b
 
14f9743
908093b
e473db8
908093b
 
 
 
 
 
 
 
 
 
 
 
14f9743
908093b
 
237c2ec
14f9743
908093b
 
 
 
e473db8
908093b
 
 
 
e6670d8
 
908093b
e6670d8
237c2ec
14f9743
908093b
e6670d8
908093b
 
 
c2616df
237c2ec
908093b
 
 
 
237c2ec
 
e6670d8
237c2ec
 
 
908093b
e6670d8
908093b
e473db8
14f9743
908093b
e473db8
14f9743
908093b
237c2ec
c2616df
 
 
 
e473db8
908093b
237c2ec
e6670d8
c2616df
 
237c2ec
e473db8
e6670d8
237c2ec
c2616df
e6670d8
237c2ec
908093b
e473db8
908093b
237c2ec
 
e473db8
14f9743
908093b
14f9743
908093b
 
14f9743
908093b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237c2ec
908093b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# -*- coding: utf-8 -*-
"""
translation_models.py

Provides language detection and translation functionalities using the
'deep-translator' library (Google Translate backend).

Dependency Handling:
- This module attempts to import 'deep-translator' when loaded.
- If the import fails, a WARNING is logged once, and translation/detection
  functions will return None without further error messages about the missing library.
- Ensure 'deep-translator' is installed in the correct Python environment.

WORKAROUND APPLIED: Removed import/handling of BadSourceLanguage/BadTargetLanguage
due to persistent ImportError on the platform, even when the library version seems correct.
"""

import logging
from typing import Dict, Optional, Union, Type # Added Type for exception hinting

# --- Logging Setup ---
logger = logging.getLogger(__name__)

# --- Constants ---
DEFAULT_LANGUAGE_CODE = "en"
AUTO_DETECT_INDICATOR = "Auto-Detect"
DETECT_TEXT_SNIPPET_LENGTH = 500  # Maximum characters for language detection
TRANSLATE_WARN_LENGTH = 4800      # Warn if text exceeds this length

# --- Dependency Import and Check ---
DEEP_TRANSLATOR_AVAILABLE = False
GoogleTranslator = None
# Define base types first
TranslationNotFound: Type[Exception] = Exception
NotValidPayload: Type[Exception] = Exception
NotValidLength: Type[Exception] = Exception
RequestError: Type[Exception] = Exception
TooManyRequests: Type[Exception] = Exception
# WORKAROUND: Initialize BadSourceLanguage/BadTargetLanguage to base Exception
# as we won't import/catch them specifically due to the persistent ImportError.
BadSourceLanguage: Type[Exception] = Exception
BadTargetLanguage: Type[Exception] = Exception


try:
    # Attempt to import the necessary components
    from deep_translator import GoogleTranslator as _GoogleTranslator
    # WORKAROUND: Import only the exceptions known NOT to cause the ImportError
    from deep_translator.exceptions import (
        TranslationNotFound as _TranslationNotFound,
        NotValidPayload as _NotValidPayload,
        NotValidLength as _NotValidLength,
        RequestError as _RequestError,
        TooManyRequests as _TooManyRequests
        # EXCLUDED: BadSourceLanguage, BadTargetLanguage
    )

    # If import successful, assign to module-level variables and set flag
    GoogleTranslator = _GoogleTranslator # type: ignore
    TranslationNotFound = _TranslationNotFound
    NotValidPayload = _NotValidPayload
    NotValidLength = _NotValidLength
    RequestError = _RequestError
    TooManyRequests = _TooManyRequests
    # BadSourceLanguage/BadTargetLanguage remain as base Exception type
    DEEP_TRANSLATOR_AVAILABLE = True
    logger.info("Successfully imported 'deep-translator' (with workaround for language exceptions). Translation features enabled.")

# NOTE: The ImportError below should NO LONGER be triggered by BadSourceLanguage,
# but we keep it for general import failures of deep_translator itself.
except ImportError as import_error:
    # Log the specific import error once when the module is loaded
    logger.warning(
        f"Could not import 'deep-translator' library components. Translation features will be disabled. "
        f"Ensure it is installed in the correct environment. Error details: {import_error}"
    )
    # DEEP_TRANSLATOR_AVAILABLE remains False

except Exception as general_error:
     # Catch other potential issues during import setup
     logger.error(
         f"An unexpected error occurred during 'deep-translator' import/setup. "
         f"Translation features may be unstable or disabled. Error: {general_error}",
         exc_info=True # Log traceback for unexpected errors
     )
     # DEEP_TRANSLATOR_AVAILABLE remains False


# --- Language Configuration ---
# (Using user-friendly names as keys for easier UI integration)
LANGUAGE_CODES: Dict[str, str] = {
    "English": "en",
    "Spanish": "es",
    "French": "fr",
    "German": "de",
    "Italian": "it",
    "Portuguese": "pt",
    "Japanese": "ja",
    "Chinese (Simplified)": "zh-CN",
    "Russian": "ru",
    "Arabic": "ar",
    "Hindi": "hi",
    "Korean": "ko",
    "Dutch": "nl",
    "Swedish": "sv",
    "Turkish": "tr",
    "Polish": "pl",
    "Vietnamese": "vi",
    "Thai": "th",
    # Add additional languages as needed
}

# --- Core Functions ---

def detect_language(text: str) -> Optional[str]:
    """
    Detects the language of the input text using the Google Translate backend.

    Args:
        text: The text snippet for language detection.

    Returns:
        The detected language code (e.g., 'en', 'es') in lowercase if successful.
        Returns DEFAULT_LANGUAGE_CODE ('en') if the input is empty or detection fails robustly.
        Returns None if the deep-translator library is unavailable.
    """
    if not DEEP_TRANSLATOR_AVAILABLE or GoogleTranslator is None:
        # Library not imported successfully, already logged at startup.
        return None

    if not text or not text.strip():
        logger.debug("Empty text provided for language detection; returning default lang code.")
        return DEFAULT_LANGUAGE_CODE

    try:
        # Use only a snippet for efficiency and API limits
        snippet = text[:DETECT_TEXT_SNIPPET_LENGTH]
        # Target doesn't matter much for detection, use default
        translator = GoogleTranslator(source='auto', target=DEFAULT_LANGUAGE_CODE)
        detected_result = translator.detect(snippet) # Can return list or string

        # Parse the potentially varied detection result
        lang_code: Optional[str] = None
        if isinstance(detected_result, list) and detected_result:
            lang_code = detected_result[0]
        elif isinstance(detected_result, str) and detected_result:
            lang_code = detected_result

        if not lang_code or not isinstance(lang_code, str):
            logger.warning(f"Detection returned invalid/empty code: '{detected_result}'. Using default.")
            return DEFAULT_LANGUAGE_CODE # Fallback if detection is weird

        final_code = lang_code.lower()
        logger.info(f"Detected language: '{final_code}' for text snippet: '{snippet[:50]}...'")
        return final_code

    except (NotValidPayload, RequestError, TooManyRequests) as e:
        logger.error(f"Language detection API error: {e}", exc_info=True)
        return DEFAULT_LANGUAGE_CODE # Fallback on API errors
    except Exception as e:
        logger.error(f"Unexpected error during language detection: {e}", exc_info=True)
        return DEFAULT_LANGUAGE_CODE # Fallback on other errors

def translate(
    text: str,
    target_language: str, # Expect user-friendly name (e.g., "Spanish")
    source_language: str = AUTO_DETECT_INDICATOR # Expect user-friendly name or "Auto-Detect"
) -> Optional[str]:
    """
    Translates text using the deep-translator Google Translate backend.

    Args:
        text: The text to translate.
        target_language: The user-friendly target language name (e.g., "Spanish").
        source_language: The user-friendly source language name or "Auto-Detect".
                         Defaults to "Auto-Detect".

    Returns:
        Translated text if successful.
        The original text if source equals target, or if text is empty.
        None if translation fails or the library is unavailable.
    """
    if not DEEP_TRANSLATOR_AVAILABLE or GoogleTranslator is None:
        # Library not imported successfully, already logged at startup.
        return None

    if not text or not text.strip():
        logger.debug("Empty text provided for translation; returning original.")
        return text

    # --- Resolve Language Codes ---
    target_code = LANGUAGE_CODES.get(target_language)
    if not target_code:
        logger.error(f"Target language name '{target_language}' not found in LANGUAGE_CODES.")
        return None # Cannot proceed without valid target

    source_code = 'auto' # Default to auto-detection
    if source_language != AUTO_DETECT_INDICATOR:
        resolved_source_code = LANGUAGE_CODES.get(source_language)
        if resolved_source_code:
            source_code = resolved_source_code
        else:
            logger.warning(
                f"Source language name '{source_language}' not found in LANGUAGE_CODES. "
                f"Falling back to 'auto'."
            )
            # Keep source_code as 'auto'

    # --- Skip if Source and Target Match ---
    # (Only skip if source was explicitly provided and matches target)
    if source_code != 'auto' and source_code == target_code:
        logger.info(f"Source language ('{source_language}') and target language ('{target_language}') "
                    f"resolve to the same code ('{source_code}'); skipping translation.")
        return text

    # --- Perform Translation ---
    logger.info(f"Attempting translation from '{source_code}' (resolved from '{source_language}') "
                f"to '{target_code}' (resolved from '{target_language}'). Input length: {len(text)}")

    if len(text) > TRANSLATE_WARN_LENGTH:
        logger.warning(
            f"Translation text length ({len(text)}) exceeds threshold ({TRANSLATE_WARN_LENGTH}). "
            "This may impact performance or encounter API limits."
        )

    try:
        # Instantiate translator with resolved codes (lowercase expected by lib)
        translator = GoogleTranslator(source=source_code.lower(), target=target_code.lower())
        translated_text = translator.translate(text)

        # --- Validate Result ---
        if translated_text is None:
            # This can happen, e.g., if translating empty strings after HTML stripping by the lib
            logger.warning("Translation API returned None. Input may have become empty after processing.")
            # Return original text if input was non-empty, otherwise empty string is fine
            return text if text.strip() else "" # Return original text if API gives None for non-empty input
        if not isinstance(translated_text, str):
            logger.error(f"Translation API returned a non-string result: {type(translated_text)}. Value: {translated_text!r}")
            return None # Indicate failure
        # It's possible valid translation results in an empty string for non-empty input
        # Log it, but return the result
        if not translated_text.strip() and text.strip():
            logger.warning("Translation resulted in an empty string for non-empty input.")

        logger.info(f"Translation successful. Output length: {len(translated_text)}")
        return translated_text

    # --- Handle Specific Translation Errors ---
    except TranslationNotFound:
        logger.error(f"Translation not found for the text between '{source_code}' and '{target_code}'.")
        return None
    except NotValidPayload as e:
        logger.error(f"Invalid payload sent to translation API: {e}", exc_info=True)
        return None
    except NotValidLength as e:
        logger.error(f"Text length issue during translation: {e}", exc_info=True)
        return None
    # WORKAROUND: Removed specific catch for BadSourceLanguage/BadTargetLanguage
    # except (BadSourceLanguage, BadTargetLanguage) as e:
    #     logger.error(f"Invalid source/target language code used for translation API: {e}", exc_info=True)
    #     return None
    except (RequestError, TooManyRequests) as e:
        logger.error(f"API request error during translation (network issue, quota exceeded, etc.): {e}", exc_info=True)
        return None
    except Exception as e:
        # Catch any other unexpected errors from the library or logic, including potentially
        # the underlying errors that BadSource/TargetLanguage would have represented.
        logger.error(f"Unexpected error during translation: {e}", exc_info=True)
        return None

# --- Test Code (for direct execution) ---
# (Self-test remains the same)
if __name__ == "__main__":
    import sys
    # Setup basic logging to console for testing
    logging.basicConfig(
        level=logging.DEBUG, # Show INFO and DEBUG messages for testing
        format='%(asctime)s - %(levelname)s - [%(name)s:%(lineno)d] - %(message)s',
        stream=sys.stdout
    )

    logger.info("--- Running Translation Module Self-Test ---")

    if not DEEP_TRANSLATOR_AVAILABLE:
        logger.warning("Self-test skipped: 'deep-translator' library is not available.")
    else:
        # Test detection
        sample_text_detect = "Bonjour tout le monde! Ceci est un test."
        logger.info(f"\nTesting detection for: '{sample_text_detect}'")
        detected_lang = detect_language(sample_text_detect)
        logger.info(f"-> Detected language code: {detected_lang}")

        # Test translation (French to English)
        sample_text_translate = "Le chat est assis sur le tapis."
        logger.info(f"\nTesting translation: '{sample_text_translate}' from French to English")
        translated_text = translate(sample_text_translate, target_language="English", source_language="French")
        if translated_text is not None:
            logger.info(f"-> Translation result: '{translated_text}'")
        else:
            logger.error("-> Translation failed.")

        # Test translation (Auto-detect Spanish to German)
        sample_text_auto = "Hola Mundo, cómo estás?"
        logger.info(f"\nTesting translation: '{sample_text_auto}' from Auto-Detect to German")
        translated_auto = translate(sample_text_auto, target_language="German", source_language=AUTO_DETECT_INDICATOR)
        if translated_auto is not None:
            logger.info(f"-> Translation result: '{translated_auto}'")
        else:
            logger.error("-> Translation failed.")

        # Test edge case: Empty string
        logger.info(f"\nTesting translation: Empty string")
        translated_empty = translate("", target_language="German", source_language="English")
        logger.info(f"-> Translation result: '{translated_empty}' (Expected: '')")

        # Test edge case: Source = Target
        logger.info(f"\nTesting translation: Source equals Target (English to English)")
        translated_same = translate("Hello", target_language="English", source_language="English")
        logger.info(f"-> Translation result: '{translated_same}' (Expected: 'Hello')")

        # Test edge case: Unknown target language name
        logger.info(f"\nTesting translation: Unknown target language name ('Klingon')")
        translated_bad_target = translate("Hello", target_language="Klingon", source_language="English")
        logger.info(f"-> Translation result: {translated_bad_target} (Expected: None)")

    logger.info("\n--- Self-Test Complete ---")