File size: 5,625 Bytes
0d67af1 28c2eed fa90b1d 7a05cd7 cb15d3a 7a05cd7 78d0ff1 283a7d3 0d1a764 58568a7 f06c14e 0d67af1 f06c14e 78d0ff1 0d89ff1 7ade3be 283a7d3 78d0ff1 f06c14e d03a2fc f651d07 4524f4c 2bbc526 f06c14e 58568a7 2bbc526 f06c14e a0092fa d03a2fc 2bbc526 f06c14e cb15d3a 2bbc526 cb15d3a f06c14e 2bbc526 f651d07 2bbc526 f651d07 71d8e8b 283a7d3 f06c14e 7ade3be 2bbc526 6add4ab 7ade3be 2bbc526 7ade3be cb15d3a 2bbc526 f651d07 f06c14e 7ade3be f36740b 2bbc526 f36740b 78d0ff1 f651d07 78d0ff1 2bbc526 f06c14e 58568a7 7ade3be f36740b cb15d3a f36740b 78d0ff1 f06c14e a22d18a cb15d3a a22d18a 7ade3be 2bbc526 a22d18a cb15d3a a22d18a cb15d3a a22d18a 78d0ff1 2bbc526 a22d18a eaf7d7b 7ade3be 2bbc526 |
|
"""base translator class"""
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
from abc import ABC, abstractmethod
from pathlib import Path
from typing import List, Optional, Union
from deep_translator.constants import GOOGLE_LANGUAGES_TO_CODES
from deep_translator.exceptions import (
InvalidSourceOrTargetLanguage,
LanguageNotSupportedException,
)
class BaseTranslator(ABC):
"""
Abstract class that serve as a base translator for other different translators
"""
def __init__(
self,
base_url: str = None,
languages: dict = GOOGLE_LANGUAGES_TO_CODES,
source: str = "auto",
target: str = "en",
payload_key: Optional[str] = None,
element_tag: Optional[str] = None,
element_query: Optional[dict] = None,
**url_params,
):
"""
@param source: source language to translate from
@param target: target language to translate to
"""
self._base_url = base_url
self._languages = languages
self._supported_languages = list(self._languages.keys())
if not source:
raise InvalidSourceOrTargetLanguage(source)
if not target:
raise InvalidSourceOrTargetLanguage(target)
self._source, self._target = self._map_language_to_code(source, target)
self._url_params = url_params
self._element_tag = element_tag
self._element_query = element_query
self.payload_key = payload_key
super().__init__()
@property
def source(self):
return self._source
@source.setter
def source(self, lang):
self._source = lang
@property
def target(self):
return self._target
@target.setter
def target(self, lang):
self._target = lang
def _type(self):
return self.__class__.__name__
def _map_language_to_code(self, *languages):
"""
map language to its corresponding code (abbreviation) if the language was passed
by its full name by the user
@param languages: list of languages
@return: mapped value of the language or raise an exception if the language is
not supported
"""
for language in languages:
if language in self._languages.values() or language == "auto":
yield language
elif language in self._languages.keys():
yield self._languages[language]
else:
raise LanguageNotSupportedException(
language,
message=f"No support for the provided language.\n"
f"Please select on of the supported languages:\n"
f"{self._languages}",
)
def _same_source_target(self) -> bool:
return self._source == self._target
def get_supported_languages(
self, as_dict: bool = False, **kwargs
) -> Union[list, dict]:
"""
return the supported languages by the Google translator
@param as_dict: if True, the languages will be returned as a dictionary
mapping languages to their abbreviations
@return: list or dict
"""
return self._supported_languages if not as_dict else self._languages
def is_language_supported(self, language: str, **kwargs) -> bool:
"""
check if the language is supported by the translator
@param language: a string for 1 language
@return: bool or raise an Exception
"""
if (
language == "auto"
or language in self._languages.keys()
or language in self._languages.values()
):
return True
else:
return False
@abstractmethod
def translate(self, text: str, **kwargs) -> str:
"""
translate a text using a translator under the hood and return
the translated text
@param text: text to translate
@param kwargs: additional arguments
@return: str
"""
return NotImplemented("You need to implement the translate method!")
def _read_docx(self, f: str):
import docx2txt
return docx2txt.process(f)
def _read_pdf(self, f: str):
import pypdf
reader = pypdf.PdfReader(f)
page = reader.pages[0]
return page.extract_text()
def _translate_file(self, path: str, **kwargs) -> str:
"""
translate directly from file
@param path: path to the target file
@type path: str
@param kwargs: additional args
@return: str
"""
if not isinstance(path, Path):
path = Path(path)
if not path.exists():
print("Path to the file is wrong!")
exit(1)
ext = path.suffix
if ext == ".docx":
text = self._read_docx(f=str(path))
elif ext == ".pdf":
text = self._read_pdf(f=str(path))
else:
with open(path, "r", encoding="utf-8") as f:
text = f.read().strip()
return self.translate(text)
def _translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
translate a list of texts
@param batch: list of texts you want to translate
@return: list of translations
"""
if not batch:
raise Exception("Enter your text list that you want to translate")
arr = []
for i, text in enumerate(batch):
translated = self.translate(text, **kwargs)
arr.append(translated)
return arr
|