# -*- coding: utf-8 -*- """ translation_models.py Provides language detection and translation functionalities using the 'deep-translator' library (Google Translate backend). Dependency Handling: - This module attempts to import 'deep-translator' when loaded. - If the import fails, a WARNING is logged once, and translation/detection functions will return None without further error messages about the missing library. - Ensure 'deep-translator' is installed in the correct Python environment. WORKAROUND APPLIED: Removed import/handling of BadSourceLanguage/BadTargetLanguage due to persistent ImportError on the platform, even when the library version seems correct. """ import logging from typing import Dict, Optional, Union, Type # Added Type for exception hinting # --- Logging Setup --- logger = logging.getLogger(__name__) # --- Constants --- DEFAULT_LANGUAGE_CODE = "en" AUTO_DETECT_INDICATOR = "Auto-Detect" DETECT_TEXT_SNIPPET_LENGTH = 500 # Maximum characters for language detection TRANSLATE_WARN_LENGTH = 4800 # Warn if text exceeds this length # --- Dependency Import and Check --- DEEP_TRANSLATOR_AVAILABLE = False GoogleTranslator = None # Define base types first TranslationNotFound: Type[Exception] = Exception NotValidPayload: Type[Exception] = Exception NotValidLength: Type[Exception] = Exception RequestError: Type[Exception] = Exception TooManyRequests: Type[Exception] = Exception # WORKAROUND: Initialize BadSourceLanguage/BadTargetLanguage to base Exception # as we won't import/catch them specifically due to the persistent ImportError. BadSourceLanguage: Type[Exception] = Exception BadTargetLanguage: Type[Exception] = Exception try: # Attempt to import the necessary components from deep_translator import GoogleTranslator as _GoogleTranslator # WORKAROUND: Import only the exceptions known NOT to cause the ImportError from deep_translator.exceptions import ( TranslationNotFound as _TranslationNotFound, NotValidPayload as _NotValidPayload, NotValidLength as _NotValidLength, RequestError as _RequestError, TooManyRequests as _TooManyRequests # EXCLUDED: BadSourceLanguage, BadTargetLanguage ) # If import successful, assign to module-level variables and set flag GoogleTranslator = _GoogleTranslator # type: ignore TranslationNotFound = _TranslationNotFound NotValidPayload = _NotValidPayload NotValidLength = _NotValidLength RequestError = _RequestError TooManyRequests = _TooManyRequests # BadSourceLanguage/BadTargetLanguage remain as base Exception type DEEP_TRANSLATOR_AVAILABLE = True logger.info("Successfully imported 'deep-translator' (with workaround for language exceptions). Translation features enabled.") # NOTE: The ImportError below should NO LONGER be triggered by BadSourceLanguage, # but we keep it for general import failures of deep_translator itself. except ImportError as import_error: # Log the specific import error once when the module is loaded logger.warning( f"Could not import 'deep-translator' library components. Translation features will be disabled. " f"Ensure it is installed in the correct environment. Error details: {import_error}" ) # DEEP_TRANSLATOR_AVAILABLE remains False except Exception as general_error: # Catch other potential issues during import setup logger.error( f"An unexpected error occurred during 'deep-translator' import/setup. " f"Translation features may be unstable or disabled. Error: {general_error}", exc_info=True # Log traceback for unexpected errors ) # DEEP_TRANSLATOR_AVAILABLE remains False # --- Language Configuration --- # (Using user-friendly names as keys for easier UI integration) LANGUAGE_CODES: Dict[str, str] = { "English": "en", "Spanish": "es", "French": "fr", "German": "de", "Italian": "it", "Portuguese": "pt", "Japanese": "ja", "Chinese (Simplified)": "zh-CN", "Russian": "ru", "Arabic": "ar", "Hindi": "hi", "Korean": "ko", "Dutch": "nl", "Swedish": "sv", "Turkish": "tr", "Polish": "pl", "Vietnamese": "vi", "Thai": "th", # Add additional languages as needed } # --- Core Functions --- def detect_language(text: str) -> Optional[str]: """ Detects the language of the input text using the Google Translate backend. Args: text: The text snippet for language detection. Returns: The detected language code (e.g., 'en', 'es') in lowercase if successful. Returns DEFAULT_LANGUAGE_CODE ('en') if the input is empty or detection fails robustly. Returns None if the deep-translator library is unavailable. """ if not DEEP_TRANSLATOR_AVAILABLE or GoogleTranslator is None: # Library not imported successfully, already logged at startup. return None if not text or not text.strip(): logger.debug("Empty text provided for language detection; returning default lang code.") return DEFAULT_LANGUAGE_CODE try: # Use only a snippet for efficiency and API limits snippet = text[:DETECT_TEXT_SNIPPET_LENGTH] # Target doesn't matter much for detection, use default translator = GoogleTranslator(source='auto', target=DEFAULT_LANGUAGE_CODE) detected_result = translator.detect(snippet) # Can return list or string # Parse the potentially varied detection result lang_code: Optional[str] = None if isinstance(detected_result, list) and detected_result: lang_code = detected_result[0] elif isinstance(detected_result, str) and detected_result: lang_code = detected_result if not lang_code or not isinstance(lang_code, str): logger.warning(f"Detection returned invalid/empty code: '{detected_result}'. Using default.") return DEFAULT_LANGUAGE_CODE # Fallback if detection is weird final_code = lang_code.lower() logger.info(f"Detected language: '{final_code}' for text snippet: '{snippet[:50]}...'") return final_code except (NotValidPayload, RequestError, TooManyRequests) as e: logger.error(f"Language detection API error: {e}", exc_info=True) return DEFAULT_LANGUAGE_CODE # Fallback on API errors except Exception as e: logger.error(f"Unexpected error during language detection: {e}", exc_info=True) return DEFAULT_LANGUAGE_CODE # Fallback on other errors def translate( text: str, target_language: str, # Expect user-friendly name (e.g., "Spanish") source_language: str = AUTO_DETECT_INDICATOR # Expect user-friendly name or "Auto-Detect" ) -> Optional[str]: """ Translates text using the deep-translator Google Translate backend. Args: text: The text to translate. target_language: The user-friendly target language name (e.g., "Spanish"). source_language: The user-friendly source language name or "Auto-Detect". Defaults to "Auto-Detect". Returns: Translated text if successful. The original text if source equals target, or if text is empty. None if translation fails or the library is unavailable. """ if not DEEP_TRANSLATOR_AVAILABLE or GoogleTranslator is None: # Library not imported successfully, already logged at startup. return None if not text or not text.strip(): logger.debug("Empty text provided for translation; returning original.") return text # --- Resolve Language Codes --- target_code = LANGUAGE_CODES.get(target_language) if not target_code: logger.error(f"Target language name '{target_language}' not found in LANGUAGE_CODES.") return None # Cannot proceed without valid target source_code = 'auto' # Default to auto-detection if source_language != AUTO_DETECT_INDICATOR: resolved_source_code = LANGUAGE_CODES.get(source_language) if resolved_source_code: source_code = resolved_source_code else: logger.warning( f"Source language name '{source_language}' not found in LANGUAGE_CODES. " f"Falling back to 'auto'." ) # Keep source_code as 'auto' # --- Skip if Source and Target Match --- # (Only skip if source was explicitly provided and matches target) if source_code != 'auto' and source_code == target_code: logger.info(f"Source language ('{source_language}') and target language ('{target_language}') " f"resolve to the same code ('{source_code}'); skipping translation.") return text # --- Perform Translation --- logger.info(f"Attempting translation from '{source_code}' (resolved from '{source_language}') " f"to '{target_code}' (resolved from '{target_language}'). Input length: {len(text)}") if len(text) > TRANSLATE_WARN_LENGTH: logger.warning( f"Translation text length ({len(text)}) exceeds threshold ({TRANSLATE_WARN_LENGTH}). " "This may impact performance or encounter API limits." ) try: # Instantiate translator with resolved codes (lowercase expected by lib) translator = GoogleTranslator(source=source_code.lower(), target=target_code.lower()) translated_text = translator.translate(text) # --- Validate Result --- if translated_text is None: # This can happen, e.g., if translating empty strings after HTML stripping by the lib logger.warning("Translation API returned None. Input may have become empty after processing.") # Return original text if input was non-empty, otherwise empty string is fine return text if text.strip() else "" # Return original text if API gives None for non-empty input if not isinstance(translated_text, str): logger.error(f"Translation API returned a non-string result: {type(translated_text)}. Value: {translated_text!r}") return None # Indicate failure # It's possible valid translation results in an empty string for non-empty input # Log it, but return the result if not translated_text.strip() and text.strip(): logger.warning("Translation resulted in an empty string for non-empty input.") logger.info(f"Translation successful. Output length: {len(translated_text)}") return translated_text # --- Handle Specific Translation Errors --- except TranslationNotFound: logger.error(f"Translation not found for the text between '{source_code}' and '{target_code}'.") return None except NotValidPayload as e: logger.error(f"Invalid payload sent to translation API: {e}", exc_info=True) return None except NotValidLength as e: logger.error(f"Text length issue during translation: {e}", exc_info=True) return None # WORKAROUND: Removed specific catch for BadSourceLanguage/BadTargetLanguage # except (BadSourceLanguage, BadTargetLanguage) as e: # logger.error(f"Invalid source/target language code used for translation API: {e}", exc_info=True) # return None except (RequestError, TooManyRequests) as e: logger.error(f"API request error during translation (network issue, quota exceeded, etc.): {e}", exc_info=True) return None except Exception as e: # Catch any other unexpected errors from the library or logic, including potentially # the underlying errors that BadSource/TargetLanguage would have represented. logger.error(f"Unexpected error during translation: {e}", exc_info=True) return None # --- Test Code (for direct execution) --- # (Self-test remains the same) if __name__ == "__main__": import sys # Setup basic logging to console for testing logging.basicConfig( level=logging.DEBUG, # Show INFO and DEBUG messages for testing format='%(asctime)s - %(levelname)s - [%(name)s:%(lineno)d] - %(message)s', stream=sys.stdout ) logger.info("--- Running Translation Module Self-Test ---") if not DEEP_TRANSLATOR_AVAILABLE: logger.warning("Self-test skipped: 'deep-translator' library is not available.") else: # Test detection sample_text_detect = "Bonjour tout le monde! Ceci est un test." logger.info(f"\nTesting detection for: '{sample_text_detect}'") detected_lang = detect_language(sample_text_detect) logger.info(f"-> Detected language code: {detected_lang}") # Test translation (French to English) sample_text_translate = "Le chat est assis sur le tapis." logger.info(f"\nTesting translation: '{sample_text_translate}' from French to English") translated_text = translate(sample_text_translate, target_language="English", source_language="French") if translated_text is not None: logger.info(f"-> Translation result: '{translated_text}'") else: logger.error("-> Translation failed.") # Test translation (Auto-detect Spanish to German) sample_text_auto = "Hola Mundo, cómo estás?" logger.info(f"\nTesting translation: '{sample_text_auto}' from Auto-Detect to German") translated_auto = translate(sample_text_auto, target_language="German", source_language=AUTO_DETECT_INDICATOR) if translated_auto is not None: logger.info(f"-> Translation result: '{translated_auto}'") else: logger.error("-> Translation failed.") # Test edge case: Empty string logger.info(f"\nTesting translation: Empty string") translated_empty = translate("", target_language="German", source_language="English") logger.info(f"-> Translation result: '{translated_empty}' (Expected: '')") # Test edge case: Source = Target logger.info(f"\nTesting translation: Source equals Target (English to English)") translated_same = translate("Hello", target_language="English", source_language="English") logger.info(f"-> Translation result: '{translated_same}' (Expected: 'Hello')") # Test edge case: Unknown target language name logger.info(f"\nTesting translation: Unknown target language name ('Klingon')") translated_bad_target = translate("Hello", target_language="Klingon", source_language="English") logger.info(f"-> Translation result: {translated_bad_target} (Expected: None)") logger.info("\n--- Self-Test Complete ---")