Spaces:
Restarting
Restarting
File size: 14,651 Bytes
14f9743 e473db8 908093b 237c2ec 908093b c2616df 14f9743 a73c69c e6670d8 908093b 14f9743 e473db8 237c2ec e473db8 908093b c2616df 908093b c2616df 908093b c2616df 14f9743 908093b c2616df e473db8 908093b c2616df e473db8 908093b c2616df 14f9743 c2616df 679fef8 c2616df 908093b c2616df 908093b e6670d8 c2616df e473db8 908093b 14f9743 a6139ac d2a4684 e6670d8 e473db8 d2a4684 14f9743 de42686 e6670d8 14f9743 237c2ec a6139ac a73c69c e473db8 870cc02 14f9743 a73c69c e6670d8 237c2ec e6670d8 e473db8 908093b 870cc02 908093b e473db8 14f9743 e6670d8 908093b e473db8 1594016 e6670d8 908093b 237c2ec 908093b e473db8 908093b e473db8 908093b e473db8 908093b 14f9743 e473db8 908093b e6670d8 908093b 14f9743 908093b 237c2ec 908093b e6670d8 237c2ec 908093b e6670d8 e473db8 908093b e473db8 398c940 237c2ec e6670d8 237c2ec e473db8 e6670d8 237c2ec 908093b 398c940 908093b e473db8 14f9743 e6670d8 908093b 237c2ec e6670d8 908093b e6670d8 908093b 14f9743 908093b e473db8 908093b 14f9743 908093b 237c2ec 14f9743 908093b e473db8 908093b e6670d8 908093b e6670d8 237c2ec 14f9743 908093b e6670d8 908093b c2616df 237c2ec 908093b 237c2ec e6670d8 237c2ec 908093b e6670d8 908093b e473db8 14f9743 908093b e473db8 14f9743 908093b 237c2ec c2616df e473db8 908093b 237c2ec e6670d8 c2616df 237c2ec e473db8 e6670d8 237c2ec c2616df e6670d8 237c2ec 908093b e473db8 908093b 237c2ec e473db8 14f9743 908093b 14f9743 908093b 14f9743 908093b 237c2ec 908093b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 |
# -*- coding: utf-8 -*-
"""
translation_models.py
Provides language detection and translation functionalities using the
'deep-translator' library (Google Translate backend).
Dependency Handling:
- This module attempts to import 'deep-translator' when loaded.
- If the import fails, a WARNING is logged once, and translation/detection
functions will return None without further error messages about the missing library.
- Ensure 'deep-translator' is installed in the correct Python environment.
WORKAROUND APPLIED: Removed import/handling of BadSourceLanguage/BadTargetLanguage
due to persistent ImportError on the platform, even when the library version seems correct.
"""
import logging
from typing import Dict, Optional, Union, Type # Added Type for exception hinting
# --- Logging Setup ---
logger = logging.getLogger(__name__)
# --- Constants ---
DEFAULT_LANGUAGE_CODE = "en"
AUTO_DETECT_INDICATOR = "Auto-Detect"
DETECT_TEXT_SNIPPET_LENGTH = 500 # Maximum characters for language detection
TRANSLATE_WARN_LENGTH = 4800 # Warn if text exceeds this length
# --- Dependency Import and Check ---
DEEP_TRANSLATOR_AVAILABLE = False
GoogleTranslator = None
# Define base types first
TranslationNotFound: Type[Exception] = Exception
NotValidPayload: Type[Exception] = Exception
NotValidLength: Type[Exception] = Exception
RequestError: Type[Exception] = Exception
TooManyRequests: Type[Exception] = Exception
# WORKAROUND: Initialize BadSourceLanguage/BadTargetLanguage to base Exception
# as we won't import/catch them specifically due to the persistent ImportError.
BadSourceLanguage: Type[Exception] = Exception
BadTargetLanguage: Type[Exception] = Exception
try:
# Attempt to import the necessary components
from deep_translator import GoogleTranslator as _GoogleTranslator
# WORKAROUND: Import only the exceptions known NOT to cause the ImportError
from deep_translator.exceptions import (
TranslationNotFound as _TranslationNotFound,
NotValidPayload as _NotValidPayload,
NotValidLength as _NotValidLength,
RequestError as _RequestError,
TooManyRequests as _TooManyRequests
# EXCLUDED: BadSourceLanguage, BadTargetLanguage
)
# If import successful, assign to module-level variables and set flag
GoogleTranslator = _GoogleTranslator # type: ignore
TranslationNotFound = _TranslationNotFound
NotValidPayload = _NotValidPayload
NotValidLength = _NotValidLength
RequestError = _RequestError
TooManyRequests = _TooManyRequests
# BadSourceLanguage/BadTargetLanguage remain as base Exception type
DEEP_TRANSLATOR_AVAILABLE = True
logger.info("Successfully imported 'deep-translator' (with workaround for language exceptions). Translation features enabled.")
# NOTE: The ImportError below should NO LONGER be triggered by BadSourceLanguage,
# but we keep it for general import failures of deep_translator itself.
except ImportError as import_error:
# Log the specific import error once when the module is loaded
logger.warning(
f"Could not import 'deep-translator' library components. Translation features will be disabled. "
f"Ensure it is installed in the correct environment. Error details: {import_error}"
)
# DEEP_TRANSLATOR_AVAILABLE remains False
except Exception as general_error:
# Catch other potential issues during import setup
logger.error(
f"An unexpected error occurred during 'deep-translator' import/setup. "
f"Translation features may be unstable or disabled. Error: {general_error}",
exc_info=True # Log traceback for unexpected errors
)
# DEEP_TRANSLATOR_AVAILABLE remains False
# --- Language Configuration ---
# (Using user-friendly names as keys for easier UI integration)
LANGUAGE_CODES: Dict[str, str] = {
"English": "en",
"Spanish": "es",
"French": "fr",
"German": "de",
"Italian": "it",
"Portuguese": "pt",
"Japanese": "ja",
"Chinese (Simplified)": "zh-CN",
"Russian": "ru",
"Arabic": "ar",
"Hindi": "hi",
"Korean": "ko",
"Dutch": "nl",
"Swedish": "sv",
"Turkish": "tr",
"Polish": "pl",
"Vietnamese": "vi",
"Thai": "th",
# Add additional languages as needed
}
# --- Core Functions ---
def detect_language(text: str) -> Optional[str]:
"""
Detects the language of the input text using the Google Translate backend.
Args:
text: The text snippet for language detection.
Returns:
The detected language code (e.g., 'en', 'es') in lowercase if successful.
Returns DEFAULT_LANGUAGE_CODE ('en') if the input is empty or detection fails robustly.
Returns None if the deep-translator library is unavailable.
"""
if not DEEP_TRANSLATOR_AVAILABLE or GoogleTranslator is None:
# Library not imported successfully, already logged at startup.
return None
if not text or not text.strip():
logger.debug("Empty text provided for language detection; returning default lang code.")
return DEFAULT_LANGUAGE_CODE
try:
# Use only a snippet for efficiency and API limits
snippet = text[:DETECT_TEXT_SNIPPET_LENGTH]
# Target doesn't matter much for detection, use default
translator = GoogleTranslator(source='auto', target=DEFAULT_LANGUAGE_CODE)
detected_result = translator.detect(snippet) # Can return list or string
# Parse the potentially varied detection result
lang_code: Optional[str] = None
if isinstance(detected_result, list) and detected_result:
lang_code = detected_result[0]
elif isinstance(detected_result, str) and detected_result:
lang_code = detected_result
if not lang_code or not isinstance(lang_code, str):
logger.warning(f"Detection returned invalid/empty code: '{detected_result}'. Using default.")
return DEFAULT_LANGUAGE_CODE # Fallback if detection is weird
final_code = lang_code.lower()
logger.info(f"Detected language: '{final_code}' for text snippet: '{snippet[:50]}...'")
return final_code
except (NotValidPayload, RequestError, TooManyRequests) as e:
logger.error(f"Language detection API error: {e}", exc_info=True)
return DEFAULT_LANGUAGE_CODE # Fallback on API errors
except Exception as e:
logger.error(f"Unexpected error during language detection: {e}", exc_info=True)
return DEFAULT_LANGUAGE_CODE # Fallback on other errors
def translate(
text: str,
target_language: str, # Expect user-friendly name (e.g., "Spanish")
source_language: str = AUTO_DETECT_INDICATOR # Expect user-friendly name or "Auto-Detect"
) -> Optional[str]:
"""
Translates text using the deep-translator Google Translate backend.
Args:
text: The text to translate.
target_language: The user-friendly target language name (e.g., "Spanish").
source_language: The user-friendly source language name or "Auto-Detect".
Defaults to "Auto-Detect".
Returns:
Translated text if successful.
The original text if source equals target, or if text is empty.
None if translation fails or the library is unavailable.
"""
if not DEEP_TRANSLATOR_AVAILABLE or GoogleTranslator is None:
# Library not imported successfully, already logged at startup.
return None
if not text or not text.strip():
logger.debug("Empty text provided for translation; returning original.")
return text
# --- Resolve Language Codes ---
target_code = LANGUAGE_CODES.get(target_language)
if not target_code:
logger.error(f"Target language name '{target_language}' not found in LANGUAGE_CODES.")
return None # Cannot proceed without valid target
source_code = 'auto' # Default to auto-detection
if source_language != AUTO_DETECT_INDICATOR:
resolved_source_code = LANGUAGE_CODES.get(source_language)
if resolved_source_code:
source_code = resolved_source_code
else:
logger.warning(
f"Source language name '{source_language}' not found in LANGUAGE_CODES. "
f"Falling back to 'auto'."
)
# Keep source_code as 'auto'
# --- Skip if Source and Target Match ---
# (Only skip if source was explicitly provided and matches target)
if source_code != 'auto' and source_code == target_code:
logger.info(f"Source language ('{source_language}') and target language ('{target_language}') "
f"resolve to the same code ('{source_code}'); skipping translation.")
return text
# --- Perform Translation ---
logger.info(f"Attempting translation from '{source_code}' (resolved from '{source_language}') "
f"to '{target_code}' (resolved from '{target_language}'). Input length: {len(text)}")
if len(text) > TRANSLATE_WARN_LENGTH:
logger.warning(
f"Translation text length ({len(text)}) exceeds threshold ({TRANSLATE_WARN_LENGTH}). "
"This may impact performance or encounter API limits."
)
try:
# Instantiate translator with resolved codes (lowercase expected by lib)
translator = GoogleTranslator(source=source_code.lower(), target=target_code.lower())
translated_text = translator.translate(text)
# --- Validate Result ---
if translated_text is None:
# This can happen, e.g., if translating empty strings after HTML stripping by the lib
logger.warning("Translation API returned None. Input may have become empty after processing.")
# Return original text if input was non-empty, otherwise empty string is fine
return text if text.strip() else "" # Return original text if API gives None for non-empty input
if not isinstance(translated_text, str):
logger.error(f"Translation API returned a non-string result: {type(translated_text)}. Value: {translated_text!r}")
return None # Indicate failure
# It's possible valid translation results in an empty string for non-empty input
# Log it, but return the result
if not translated_text.strip() and text.strip():
logger.warning("Translation resulted in an empty string for non-empty input.")
logger.info(f"Translation successful. Output length: {len(translated_text)}")
return translated_text
# --- Handle Specific Translation Errors ---
except TranslationNotFound:
logger.error(f"Translation not found for the text between '{source_code}' and '{target_code}'.")
return None
except NotValidPayload as e:
logger.error(f"Invalid payload sent to translation API: {e}", exc_info=True)
return None
except NotValidLength as e:
logger.error(f"Text length issue during translation: {e}", exc_info=True)
return None
# WORKAROUND: Removed specific catch for BadSourceLanguage/BadTargetLanguage
# except (BadSourceLanguage, BadTargetLanguage) as e:
# logger.error(f"Invalid source/target language code used for translation API: {e}", exc_info=True)
# return None
except (RequestError, TooManyRequests) as e:
logger.error(f"API request error during translation (network issue, quota exceeded, etc.): {e}", exc_info=True)
return None
except Exception as e:
# Catch any other unexpected errors from the library or logic, including potentially
# the underlying errors that BadSource/TargetLanguage would have represented.
logger.error(f"Unexpected error during translation: {e}", exc_info=True)
return None
# --- Test Code (for direct execution) ---
# (Self-test remains the same)
if __name__ == "__main__":
import sys
# Setup basic logging to console for testing
logging.basicConfig(
level=logging.DEBUG, # Show INFO and DEBUG messages for testing
format='%(asctime)s - %(levelname)s - [%(name)s:%(lineno)d] - %(message)s',
stream=sys.stdout
)
logger.info("--- Running Translation Module Self-Test ---")
if not DEEP_TRANSLATOR_AVAILABLE:
logger.warning("Self-test skipped: 'deep-translator' library is not available.")
else:
# Test detection
sample_text_detect = "Bonjour tout le monde! Ceci est un test."
logger.info(f"\nTesting detection for: '{sample_text_detect}'")
detected_lang = detect_language(sample_text_detect)
logger.info(f"-> Detected language code: {detected_lang}")
# Test translation (French to English)
sample_text_translate = "Le chat est assis sur le tapis."
logger.info(f"\nTesting translation: '{sample_text_translate}' from French to English")
translated_text = translate(sample_text_translate, target_language="English", source_language="French")
if translated_text is not None:
logger.info(f"-> Translation result: '{translated_text}'")
else:
logger.error("-> Translation failed.")
# Test translation (Auto-detect Spanish to German)
sample_text_auto = "Hola Mundo, cómo estás?"
logger.info(f"\nTesting translation: '{sample_text_auto}' from Auto-Detect to German")
translated_auto = translate(sample_text_auto, target_language="German", source_language=AUTO_DETECT_INDICATOR)
if translated_auto is not None:
logger.info(f"-> Translation result: '{translated_auto}'")
else:
logger.error("-> Translation failed.")
# Test edge case: Empty string
logger.info(f"\nTesting translation: Empty string")
translated_empty = translate("", target_language="German", source_language="English")
logger.info(f"-> Translation result: '{translated_empty}' (Expected: '')")
# Test edge case: Source = Target
logger.info(f"\nTesting translation: Source equals Target (English to English)")
translated_same = translate("Hello", target_language="English", source_language="English")
logger.info(f"-> Translation result: '{translated_same}' (Expected: 'Hello')")
# Test edge case: Unknown target language name
logger.info(f"\nTesting translation: Unknown target language name ('Klingon')")
translated_bad_target = translate("Hello", target_language="Klingon", source_language="English")
logger.info(f"-> Translation result: {translated_bad_target} (Expected: None)")
logger.info("\n--- Self-Test Complete ---") |