File size: 5,625 Bytes
0d67af1 28c2eed fa90b1d 7a05cd7 cb15d3a 7a05cd7 78d0ff1 283a7d3 0d1a764 58568a7 f06c14e 0d67af1 f06c14e 78d0ff1 0d89ff1 7ade3be 283a7d3 78d0ff1 f06c14e d03a2fc f651d07 4524f4c 2bbc526 f06c14e 58568a7 2bbc526 f06c14e a0092fa d03a2fc 2bbc526 f06c14e cb15d3a 2bbc526 cb15d3a f06c14e 2bbc526 f651d07 2bbc526 f651d07 71d8e8b 283a7d3 f06c14e 7ade3be 2bbc526 6add4ab 7ade3be 2bbc526 7ade3be cb15d3a 2bbc526 f651d07 f06c14e 7ade3be f36740b 2bbc526 f36740b 78d0ff1 f651d07 78d0ff1 2bbc526 f06c14e 58568a7 7ade3be f36740b cb15d3a f36740b 78d0ff1 f06c14e a22d18a cb15d3a a22d18a 7ade3be 2bbc526 a22d18a cb15d3a a22d18a cb15d3a a22d18a 78d0ff1 2bbc526 a22d18a eaf7d7b 7ade3be 2bbc526 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
"""base translator class"""
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
from abc import ABC, abstractmethod
from pathlib import Path
from typing import List, Optional, Union
from deep_translator.constants import GOOGLE_LANGUAGES_TO_CODES
from deep_translator.exceptions import (
InvalidSourceOrTargetLanguage,
LanguageNotSupportedException,
)
class BaseTranslator(ABC):
"""
Abstract class that serve as a base translator for other different translators
"""
def __init__(
self,
base_url: str = None,
languages: dict = GOOGLE_LANGUAGES_TO_CODES,
source: str = "auto",
target: str = "en",
payload_key: Optional[str] = None,
element_tag: Optional[str] = None,
element_query: Optional[dict] = None,
**url_params,
):
"""
@param source: source language to translate from
@param target: target language to translate to
"""
self._base_url = base_url
self._languages = languages
self._supported_languages = list(self._languages.keys())
if not source:
raise InvalidSourceOrTargetLanguage(source)
if not target:
raise InvalidSourceOrTargetLanguage(target)
self._source, self._target = self._map_language_to_code(source, target)
self._url_params = url_params
self._element_tag = element_tag
self._element_query = element_query
self.payload_key = payload_key
super().__init__()
@property
def source(self):
return self._source
@source.setter
def source(self, lang):
self._source = lang
@property
def target(self):
return self._target
@target.setter
def target(self, lang):
self._target = lang
def _type(self):
return self.__class__.__name__
def _map_language_to_code(self, *languages):
"""
map language to its corresponding code (abbreviation) if the language was passed
by its full name by the user
@param languages: list of languages
@return: mapped value of the language or raise an exception if the language is
not supported
"""
for language in languages:
if language in self._languages.values() or language == "auto":
yield language
elif language in self._languages.keys():
yield self._languages[language]
else:
raise LanguageNotSupportedException(
language,
message=f"No support for the provided language.\n"
f"Please select on of the supported languages:\n"
f"{self._languages}",
)
def _same_source_target(self) -> bool:
return self._source == self._target
def get_supported_languages(
self, as_dict: bool = False, **kwargs
) -> Union[list, dict]:
"""
return the supported languages by the Google translator
@param as_dict: if True, the languages will be returned as a dictionary
mapping languages to their abbreviations
@return: list or dict
"""
return self._supported_languages if not as_dict else self._languages
def is_language_supported(self, language: str, **kwargs) -> bool:
"""
check if the language is supported by the translator
@param language: a string for 1 language
@return: bool or raise an Exception
"""
if (
language == "auto"
or language in self._languages.keys()
or language in self._languages.values()
):
return True
else:
return False
@abstractmethod
def translate(self, text: str, **kwargs) -> str:
"""
translate a text using a translator under the hood and return
the translated text
@param text: text to translate
@param kwargs: additional arguments
@return: str
"""
return NotImplemented("You need to implement the translate method!")
def _read_docx(self, f: str):
import docx2txt
return docx2txt.process(f)
def _read_pdf(self, f: str):
import pypdf
reader = pypdf.PdfReader(f)
page = reader.pages[0]
return page.extract_text()
def _translate_file(self, path: str, **kwargs) -> str:
"""
translate directly from file
@param path: path to the target file
@type path: str
@param kwargs: additional args
@return: str
"""
if not isinstance(path, Path):
path = Path(path)
if not path.exists():
print("Path to the file is wrong!")
exit(1)
ext = path.suffix
if ext == ".docx":
text = self._read_docx(f=str(path))
elif ext == ".pdf":
text = self._read_pdf(f=str(path))
else:
with open(path, "r", encoding="utf-8") as f:
text = f.read().strip()
return self.translate(text)
def _translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
translate a list of texts
@param batch: list of texts you want to translate
@return: list of translations
"""
if not batch:
raise Exception("Enter your text list that you want to translate")
arr = []
for i, text in enumerate(batch):
translated = self.translate(text, **kwargs)
arr.append(translated)
return arr
|