Spaces:

mgbam
/

radvisionai

Running

App Files Files Community

radvisionai / translation_models.py

mgbam

Update translation_models.py

c2616df verified 3 days ago

raw

history blame contribute delete

14.7 kB

	# -- coding: utf-8 --
	"""
	translation_models.py

	Provides language detection and translation functionalities using the
	'deep-translator' library (Google Translate backend).

	Dependency Handling:
	- This module attempts to import 'deep-translator' when loaded.
	- If the import fails, a WARNING is logged once, and translation/detection
	functions will return None without further error messages about the missing library.
	- Ensure 'deep-translator' is installed in the correct Python environment.

	WORKAROUND APPLIED: Removed import/handling of BadSourceLanguage/BadTargetLanguage
	due to persistent ImportError on the platform, even when the library version seems correct.
	"""

	import logging
	from typing import Dict, Optional, Union, Type # Added Type for exception hinting

	# --- Logging Setup ---
	logger = logging.getLogger(__name__)

	# --- Constants ---
	DEFAULT_LANGUAGE_CODE = "en"
	AUTO_DETECT_INDICATOR = "Auto-Detect"
	DETECT_TEXT_SNIPPET_LENGTH = 500 # Maximum characters for language detection
	TRANSLATE_WARN_LENGTH = 4800 # Warn if text exceeds this length

	# --- Dependency Import and Check ---
	DEEP_TRANSLATOR_AVAILABLE = False
	GoogleTranslator = None
	# Define base types first
	TranslationNotFound: Type[Exception] = Exception
	NotValidPayload: Type[Exception] = Exception
	NotValidLength: Type[Exception] = Exception
	RequestError: Type[Exception] = Exception
	TooManyRequests: Type[Exception] = Exception
	# WORKAROUND: Initialize BadSourceLanguage/BadTargetLanguage to base Exception
	# as we won't import/catch them specifically due to the persistent ImportError.
	BadSourceLanguage: Type[Exception] = Exception
	BadTargetLanguage: Type[Exception] = Exception


	try:
	# Attempt to import the necessary components
	from deep_translator import GoogleTranslator as _GoogleTranslator
	# WORKAROUND: Import only the exceptions known NOT to cause the ImportError
	from deep_translator.exceptions import (
	TranslationNotFound as _TranslationNotFound,
	NotValidPayload as _NotValidPayload,
	NotValidLength as _NotValidLength,
	RequestError as _RequestError,
	TooManyRequests as _TooManyRequests
	# EXCLUDED: BadSourceLanguage, BadTargetLanguage
	)

	# If import successful, assign to module-level variables and set flag
	GoogleTranslator = _GoogleTranslator # type: ignore
	TranslationNotFound = _TranslationNotFound
	NotValidPayload = _NotValidPayload
	NotValidLength = _NotValidLength
	RequestError = _RequestError
	TooManyRequests = _TooManyRequests
	# BadSourceLanguage/BadTargetLanguage remain as base Exception type
	DEEP_TRANSLATOR_AVAILABLE = True
	logger.info("Successfully imported 'deep-translator' (with workaround for language exceptions). Translation features enabled.")

	# NOTE: The ImportError below should NO LONGER be triggered by BadSourceLanguage,
	# but we keep it for general import failures of deep_translator itself.
	except ImportError as import_error:
	# Log the specific import error once when the module is loaded
	logger.warning(
	f"Could not import 'deep-translator' library components. Translation features will be disabled. "
	f"Ensure it is installed in the correct environment. Error details: {import_error}"
	)
	# DEEP_TRANSLATOR_AVAILABLE remains False

	except Exception as general_error:
	# Catch other potential issues during import setup
	logger.error(
	f"An unexpected error occurred during 'deep-translator' import/setup. "
	f"Translation features may be unstable or disabled. Error: {general_error}",
	exc_info=True # Log traceback for unexpected errors
	)
	# DEEP_TRANSLATOR_AVAILABLE remains False


	# --- Language Configuration ---
	# (Using user-friendly names as keys for easier UI integration)
	LANGUAGE_CODES: Dict[str, str] = {
	"English": "en",
	"Spanish": "es",
	"French": "fr",
	"German": "de",
	"Italian": "it",
	"Portuguese": "pt",
	"Japanese": "ja",
	"Chinese (Simplified)": "zh-CN",
	"Russian": "ru",
	"Arabic": "ar",
	"Hindi": "hi",
	"Korean": "ko",
	"Dutch": "nl",
	"Swedish": "sv",
	"Turkish": "tr",
	"Polish": "pl",
	"Vietnamese": "vi",
	"Thai": "th",
	# Add additional languages as needed
	}

	# --- Core Functions ---

	def detect_language(text: str) -> Optional[str]:
	"""
	Detects the language of the input text using the Google Translate backend.

	Args:
	text: The text snippet for language detection.

	Returns:
	The detected language code (e.g., 'en', 'es') in lowercase if successful.
	Returns DEFAULT_LANGUAGE_CODE ('en') if the input is empty or detection fails robustly.
	Returns None if the deep-translator library is unavailable.
	"""
	if not DEEP_TRANSLATOR_AVAILABLE or GoogleTranslator is None:
	# Library not imported successfully, already logged at startup.
	return None

	if not text or not text.strip():
	logger.debug("Empty text provided for language detection; returning default lang code.")
	return DEFAULT_LANGUAGE_CODE

	try:
	# Use only a snippet for efficiency and API limits
	snippet = text[:DETECT_TEXT_SNIPPET_LENGTH]
	# Target doesn't matter much for detection, use default
	translator = GoogleTranslator(source='auto', target=DEFAULT_LANGUAGE_CODE)
	detected_result = translator.detect(snippet) # Can return list or string

	# Parse the potentially varied detection result
	lang_code: Optional[str] = None
	if isinstance(detected_result, list) and detected_result:
	lang_code = detected_result[0]
	elif isinstance(detected_result, str) and detected_result:
	lang_code = detected_result

	if not lang_code or not isinstance(lang_code, str):
	logger.warning(f"Detection returned invalid/empty code: '{detected_result}'. Using default.")
	return DEFAULT_LANGUAGE_CODE # Fallback if detection is weird

	final_code = lang_code.lower()
	logger.info(f"Detected language: '{final_code}' for text snippet: '{snippet[:50]}...'")
	return final_code

	except (NotValidPayload, RequestError, TooManyRequests) as e:
	logger.error(f"Language detection API error: {e}", exc_info=True)
	return DEFAULT_LANGUAGE_CODE # Fallback on API errors
	except Exception as e:
	logger.error(f"Unexpected error during language detection: {e}", exc_info=True)
	return DEFAULT_LANGUAGE_CODE # Fallback on other errors

	def translate(
	text: str,
	target_language: str, # Expect user-friendly name (e.g., "Spanish")
	source_language: str = AUTO_DETECT_INDICATOR # Expect user-friendly name or "Auto-Detect"
	) -> Optional[str]:
	"""
	Translates text using the deep-translator Google Translate backend.

	Args:
	text: The text to translate.
	target_language: The user-friendly target language name (e.g., "Spanish").
	source_language: The user-friendly source language name or "Auto-Detect".
	Defaults to "Auto-Detect".

	Returns:
	Translated text if successful.
	The original text if source equals target, or if text is empty.
	None if translation fails or the library is unavailable.
	"""
	if not DEEP_TRANSLATOR_AVAILABLE or GoogleTranslator is None:
	# Library not imported successfully, already logged at startup.
	return None

	if not text or not text.strip():
	logger.debug("Empty text provided for translation; returning original.")
	return text

	# --- Resolve Language Codes ---
	target_code = LANGUAGE_CODES.get(target_language)
	if not target_code:
	logger.error(f"Target language name '{target_language}' not found in LANGUAGE_CODES.")
	return None # Cannot proceed without valid target

	source_code = 'auto' # Default to auto-detection
	if source_language != AUTO_DETECT_INDICATOR:
	resolved_source_code = LANGUAGE_CODES.get(source_language)
	if resolved_source_code:
	source_code = resolved_source_code
	else:
	logger.warning(
	f"Source language name '{source_language}' not found in LANGUAGE_CODES. "
	f"Falling back to 'auto'."
	)
	# Keep source_code as 'auto'

	# --- Skip if Source and Target Match ---
	# (Only skip if source was explicitly provided and matches target)
	if source_code != 'auto' and source_code == target_code:
	logger.info(f"Source language ('{source_language}') and target language ('{target_language}') "
	f"resolve to the same code ('{source_code}'); skipping translation.")
	return text

	# --- Perform Translation ---
	logger.info(f"Attempting translation from '{source_code}' (resolved from '{source_language}') "
	f"to '{target_code}' (resolved from '{target_language}'). Input length: {len(text)}")

	if len(text) > TRANSLATE_WARN_LENGTH:
	logger.warning(
	f"Translation text length ({len(text)}) exceeds threshold ({TRANSLATE_WARN_LENGTH}). "
	"This may impact performance or encounter API limits."
	)

	try:
	# Instantiate translator with resolved codes (lowercase expected by lib)
	translator = GoogleTranslator(source=source_code.lower(), target=target_code.lower())
	translated_text = translator.translate(text)

	# --- Validate Result ---
	if translated_text is None:
	# This can happen, e.g., if translating empty strings after HTML stripping by the lib
	logger.warning("Translation API returned None. Input may have become empty after processing.")
	# Return original text if input was non-empty, otherwise empty string is fine
	return text if text.strip() else "" # Return original text if API gives None for non-empty input
	if not isinstance(translated_text, str):
	logger.error(f"Translation API returned a non-string result: {type(translated_text)}. Value: {translated_text!r}")
	return None # Indicate failure
	# It's possible valid translation results in an empty string for non-empty input
	# Log it, but return the result
	if not translated_text.strip() and text.strip():
	logger.warning("Translation resulted in an empty string for non-empty input.")

	logger.info(f"Translation successful. Output length: {len(translated_text)}")
	return translated_text

	# --- Handle Specific Translation Errors ---
	except TranslationNotFound:
	logger.error(f"Translation not found for the text between '{source_code}' and '{target_code}'.")
	return None
	except NotValidPayload as e:
	logger.error(f"Invalid payload sent to translation API: {e}", exc_info=True)
	return None
	except NotValidLength as e:
	logger.error(f"Text length issue during translation: {e}", exc_info=True)
	return None
	# WORKAROUND: Removed specific catch for BadSourceLanguage/BadTargetLanguage
	# except (BadSourceLanguage, BadTargetLanguage) as e:
	# logger.error(f"Invalid source/target language code used for translation API: {e}", exc_info=True)
	# return None
	except (RequestError, TooManyRequests) as e:
	logger.error(f"API request error during translation (network issue, quota exceeded, etc.): {e}", exc_info=True)
	return None
	except Exception as e:
	# Catch any other unexpected errors from the library or logic, including potentially
	# the underlying errors that BadSource/TargetLanguage would have represented.
	logger.error(f"Unexpected error during translation: {e}", exc_info=True)
	return None

	# --- Test Code (for direct execution) ---
	# (Self-test remains the same)
	if __name__ == "__main__":
	import sys
	# Setup basic logging to console for testing
	logging.basicConfig(
	level=logging.DEBUG, # Show INFO and DEBUG messages for testing
	format='%(asctime)s - %(levelname)s - [%(name)s:%(lineno)d] - %(message)s',
	stream=sys.stdout
	)

	logger.info("--- Running Translation Module Self-Test ---")

	if not DEEP_TRANSLATOR_AVAILABLE:
	logger.warning("Self-test skipped: 'deep-translator' library is not available.")
	else:
	# Test detection
	sample_text_detect = "Bonjour tout le monde! Ceci est un test."
	logger.info(f"\nTesting detection for: '{sample_text_detect}'")
	detected_lang = detect_language(sample_text_detect)
	logger.info(f"-> Detected language code: {detected_lang}")

	# Test translation (French to English)
	sample_text_translate = "Le chat est assis sur le tapis."
	logger.info(f"\nTesting translation: '{sample_text_translate}' from French to English")
	translated_text = translate(sample_text_translate, target_language="English", source_language="French")
	if translated_text is not None:
	logger.info(f"-> Translation result: '{translated_text}'")
	else:
	logger.error("-> Translation failed.")

	# Test translation (Auto-detect Spanish to German)
	sample_text_auto = "Hola Mundo, cómo estás?"
	logger.info(f"\nTesting translation: '{sample_text_auto}' from Auto-Detect to German")
	translated_auto = translate(sample_text_auto, target_language="German", source_language=AUTO_DETECT_INDICATOR)
	if translated_auto is not None:
	logger.info(f"-> Translation result: '{translated_auto}'")
	else:
	logger.error("-> Translation failed.")

	# Test edge case: Empty string
	logger.info(f"\nTesting translation: Empty string")
	translated_empty = translate("", target_language="German", source_language="English")
	logger.info(f"-> Translation result: '{translated_empty}' (Expected: '')")

	# Test edge case: Source = Target
	logger.info(f"\nTesting translation: Source equals Target (English to English)")
	translated_same = translate("Hello", target_language="English", source_language="English")
	logger.info(f"-> Translation result: '{translated_same}' (Expected: 'Hello')")

	# Test edge case: Unknown target language name
	logger.info(f"\nTesting translation: Unknown target language name ('Klingon')")
	translated_bad_target = translate("Hello", target_language="Klingon", source_language="English")
	logger.info(f"-> Translation result: {translated_bad_target} (Expected: None)")

	logger.info("\n--- Self-Test Complete ---")