radvisionai / translation_models.py
mgbam's picture
Update translation_models.py
c2616df verified
# -*- coding: utf-8 -*-
"""
translation_models.py
Provides language detection and translation functionalities using the
'deep-translator' library (Google Translate backend).
Dependency Handling:
- This module attempts to import 'deep-translator' when loaded.
- If the import fails, a WARNING is logged once, and translation/detection
functions will return None without further error messages about the missing library.
- Ensure 'deep-translator' is installed in the correct Python environment.
WORKAROUND APPLIED: Removed import/handling of BadSourceLanguage/BadTargetLanguage
due to persistent ImportError on the platform, even when the library version seems correct.
"""
import logging
from typing import Dict, Optional, Union, Type # Added Type for exception hinting
# --- Logging Setup ---
logger = logging.getLogger(__name__)
# --- Constants ---
DEFAULT_LANGUAGE_CODE = "en"
AUTO_DETECT_INDICATOR = "Auto-Detect"
DETECT_TEXT_SNIPPET_LENGTH = 500 # Maximum characters for language detection
TRANSLATE_WARN_LENGTH = 4800 # Warn if text exceeds this length
# --- Dependency Import and Check ---
DEEP_TRANSLATOR_AVAILABLE = False
GoogleTranslator = None
# Define base types first
TranslationNotFound: Type[Exception] = Exception
NotValidPayload: Type[Exception] = Exception
NotValidLength: Type[Exception] = Exception
RequestError: Type[Exception] = Exception
TooManyRequests: Type[Exception] = Exception
# WORKAROUND: Initialize BadSourceLanguage/BadTargetLanguage to base Exception
# as we won't import/catch them specifically due to the persistent ImportError.
BadSourceLanguage: Type[Exception] = Exception
BadTargetLanguage: Type[Exception] = Exception
try:
# Attempt to import the necessary components
from deep_translator import GoogleTranslator as _GoogleTranslator
# WORKAROUND: Import only the exceptions known NOT to cause the ImportError
from deep_translator.exceptions import (
TranslationNotFound as _TranslationNotFound,
NotValidPayload as _NotValidPayload,
NotValidLength as _NotValidLength,
RequestError as _RequestError,
TooManyRequests as _TooManyRequests
# EXCLUDED: BadSourceLanguage, BadTargetLanguage
)
# If import successful, assign to module-level variables and set flag
GoogleTranslator = _GoogleTranslator # type: ignore
TranslationNotFound = _TranslationNotFound
NotValidPayload = _NotValidPayload
NotValidLength = _NotValidLength
RequestError = _RequestError
TooManyRequests = _TooManyRequests
# BadSourceLanguage/BadTargetLanguage remain as base Exception type
DEEP_TRANSLATOR_AVAILABLE = True
logger.info("Successfully imported 'deep-translator' (with workaround for language exceptions). Translation features enabled.")
# NOTE: The ImportError below should NO LONGER be triggered by BadSourceLanguage,
# but we keep it for general import failures of deep_translator itself.
except ImportError as import_error:
# Log the specific import error once when the module is loaded
logger.warning(
f"Could not import 'deep-translator' library components. Translation features will be disabled. "
f"Ensure it is installed in the correct environment. Error details: {import_error}"
)
# DEEP_TRANSLATOR_AVAILABLE remains False
except Exception as general_error:
# Catch other potential issues during import setup
logger.error(
f"An unexpected error occurred during 'deep-translator' import/setup. "
f"Translation features may be unstable or disabled. Error: {general_error}",
exc_info=True # Log traceback for unexpected errors
)
# DEEP_TRANSLATOR_AVAILABLE remains False
# --- Language Configuration ---
# (Using user-friendly names as keys for easier UI integration)
LANGUAGE_CODES: Dict[str, str] = {
"English": "en",
"Spanish": "es",
"French": "fr",
"German": "de",
"Italian": "it",
"Portuguese": "pt",
"Japanese": "ja",
"Chinese (Simplified)": "zh-CN",
"Russian": "ru",
"Arabic": "ar",
"Hindi": "hi",
"Korean": "ko",
"Dutch": "nl",
"Swedish": "sv",
"Turkish": "tr",
"Polish": "pl",
"Vietnamese": "vi",
"Thai": "th",
# Add additional languages as needed
}
# --- Core Functions ---
def detect_language(text: str) -> Optional[str]:
"""
Detects the language of the input text using the Google Translate backend.
Args:
text: The text snippet for language detection.
Returns:
The detected language code (e.g., 'en', 'es') in lowercase if successful.
Returns DEFAULT_LANGUAGE_CODE ('en') if the input is empty or detection fails robustly.
Returns None if the deep-translator library is unavailable.
"""
if not DEEP_TRANSLATOR_AVAILABLE or GoogleTranslator is None:
# Library not imported successfully, already logged at startup.
return None
if not text or not text.strip():
logger.debug("Empty text provided for language detection; returning default lang code.")
return DEFAULT_LANGUAGE_CODE
try:
# Use only a snippet for efficiency and API limits
snippet = text[:DETECT_TEXT_SNIPPET_LENGTH]
# Target doesn't matter much for detection, use default
translator = GoogleTranslator(source='auto', target=DEFAULT_LANGUAGE_CODE)
detected_result = translator.detect(snippet) # Can return list or string
# Parse the potentially varied detection result
lang_code: Optional[str] = None
if isinstance(detected_result, list) and detected_result:
lang_code = detected_result[0]
elif isinstance(detected_result, str) and detected_result:
lang_code = detected_result
if not lang_code or not isinstance(lang_code, str):
logger.warning(f"Detection returned invalid/empty code: '{detected_result}'. Using default.")
return DEFAULT_LANGUAGE_CODE # Fallback if detection is weird
final_code = lang_code.lower()
logger.info(f"Detected language: '{final_code}' for text snippet: '{snippet[:50]}...'")
return final_code
except (NotValidPayload, RequestError, TooManyRequests) as e:
logger.error(f"Language detection API error: {e}", exc_info=True)
return DEFAULT_LANGUAGE_CODE # Fallback on API errors
except Exception as e:
logger.error(f"Unexpected error during language detection: {e}", exc_info=True)
return DEFAULT_LANGUAGE_CODE # Fallback on other errors
def translate(
text: str,
target_language: str, # Expect user-friendly name (e.g., "Spanish")
source_language: str = AUTO_DETECT_INDICATOR # Expect user-friendly name or "Auto-Detect"
) -> Optional[str]:
"""
Translates text using the deep-translator Google Translate backend.
Args:
text: The text to translate.
target_language: The user-friendly target language name (e.g., "Spanish").
source_language: The user-friendly source language name or "Auto-Detect".
Defaults to "Auto-Detect".
Returns:
Translated text if successful.
The original text if source equals target, or if text is empty.
None if translation fails or the library is unavailable.
"""
if not DEEP_TRANSLATOR_AVAILABLE or GoogleTranslator is None:
# Library not imported successfully, already logged at startup.
return None
if not text or not text.strip():
logger.debug("Empty text provided for translation; returning original.")
return text
# --- Resolve Language Codes ---
target_code = LANGUAGE_CODES.get(target_language)
if not target_code:
logger.error(f"Target language name '{target_language}' not found in LANGUAGE_CODES.")
return None # Cannot proceed without valid target
source_code = 'auto' # Default to auto-detection
if source_language != AUTO_DETECT_INDICATOR:
resolved_source_code = LANGUAGE_CODES.get(source_language)
if resolved_source_code:
source_code = resolved_source_code
else:
logger.warning(
f"Source language name '{source_language}' not found in LANGUAGE_CODES. "
f"Falling back to 'auto'."
)
# Keep source_code as 'auto'
# --- Skip if Source and Target Match ---
# (Only skip if source was explicitly provided and matches target)
if source_code != 'auto' and source_code == target_code:
logger.info(f"Source language ('{source_language}') and target language ('{target_language}') "
f"resolve to the same code ('{source_code}'); skipping translation.")
return text
# --- Perform Translation ---
logger.info(f"Attempting translation from '{source_code}' (resolved from '{source_language}') "
f"to '{target_code}' (resolved from '{target_language}'). Input length: {len(text)}")
if len(text) > TRANSLATE_WARN_LENGTH:
logger.warning(
f"Translation text length ({len(text)}) exceeds threshold ({TRANSLATE_WARN_LENGTH}). "
"This may impact performance or encounter API limits."
)
try:
# Instantiate translator with resolved codes (lowercase expected by lib)
translator = GoogleTranslator(source=source_code.lower(), target=target_code.lower())
translated_text = translator.translate(text)
# --- Validate Result ---
if translated_text is None:
# This can happen, e.g., if translating empty strings after HTML stripping by the lib
logger.warning("Translation API returned None. Input may have become empty after processing.")
# Return original text if input was non-empty, otherwise empty string is fine
return text if text.strip() else "" # Return original text if API gives None for non-empty input
if not isinstance(translated_text, str):
logger.error(f"Translation API returned a non-string result: {type(translated_text)}. Value: {translated_text!r}")
return None # Indicate failure
# It's possible valid translation results in an empty string for non-empty input
# Log it, but return the result
if not translated_text.strip() and text.strip():
logger.warning("Translation resulted in an empty string for non-empty input.")
logger.info(f"Translation successful. Output length: {len(translated_text)}")
return translated_text
# --- Handle Specific Translation Errors ---
except TranslationNotFound:
logger.error(f"Translation not found for the text between '{source_code}' and '{target_code}'.")
return None
except NotValidPayload as e:
logger.error(f"Invalid payload sent to translation API: {e}", exc_info=True)
return None
except NotValidLength as e:
logger.error(f"Text length issue during translation: {e}", exc_info=True)
return None
# WORKAROUND: Removed specific catch for BadSourceLanguage/BadTargetLanguage
# except (BadSourceLanguage, BadTargetLanguage) as e:
# logger.error(f"Invalid source/target language code used for translation API: {e}", exc_info=True)
# return None
except (RequestError, TooManyRequests) as e:
logger.error(f"API request error during translation (network issue, quota exceeded, etc.): {e}", exc_info=True)
return None
except Exception as e:
# Catch any other unexpected errors from the library or logic, including potentially
# the underlying errors that BadSource/TargetLanguage would have represented.
logger.error(f"Unexpected error during translation: {e}", exc_info=True)
return None
# --- Test Code (for direct execution) ---
# (Self-test remains the same)
if __name__ == "__main__":
import sys
# Setup basic logging to console for testing
logging.basicConfig(
level=logging.DEBUG, # Show INFO and DEBUG messages for testing
format='%(asctime)s - %(levelname)s - [%(name)s:%(lineno)d] - %(message)s',
stream=sys.stdout
)
logger.info("--- Running Translation Module Self-Test ---")
if not DEEP_TRANSLATOR_AVAILABLE:
logger.warning("Self-test skipped: 'deep-translator' library is not available.")
else:
# Test detection
sample_text_detect = "Bonjour tout le monde! Ceci est un test."
logger.info(f"\nTesting detection for: '{sample_text_detect}'")
detected_lang = detect_language(sample_text_detect)
logger.info(f"-> Detected language code: {detected_lang}")
# Test translation (French to English)
sample_text_translate = "Le chat est assis sur le tapis."
logger.info(f"\nTesting translation: '{sample_text_translate}' from French to English")
translated_text = translate(sample_text_translate, target_language="English", source_language="French")
if translated_text is not None:
logger.info(f"-> Translation result: '{translated_text}'")
else:
logger.error("-> Translation failed.")
# Test translation (Auto-detect Spanish to German)
sample_text_auto = "Hola Mundo, cómo estás?"
logger.info(f"\nTesting translation: '{sample_text_auto}' from Auto-Detect to German")
translated_auto = translate(sample_text_auto, target_language="German", source_language=AUTO_DETECT_INDICATOR)
if translated_auto is not None:
logger.info(f"-> Translation result: '{translated_auto}'")
else:
logger.error("-> Translation failed.")
# Test edge case: Empty string
logger.info(f"\nTesting translation: Empty string")
translated_empty = translate("", target_language="German", source_language="English")
logger.info(f"-> Translation result: '{translated_empty}' (Expected: '')")
# Test edge case: Source = Target
logger.info(f"\nTesting translation: Source equals Target (English to English)")
translated_same = translate("Hello", target_language="English", source_language="English")
logger.info(f"-> Translation result: '{translated_same}' (Expected: 'Hello')")
# Test edge case: Unknown target language name
logger.info(f"\nTesting translation: Unknown target language name ('Klingon')")
translated_bad_target = translate("Hello", target_language="Klingon", source_language="English")
logger.info(f"-> Translation result: {translated_bad_target} (Expected: None)")
logger.info("\n--- Self-Test Complete ---")