nidhal baccouri
commited on
Commit
·
2bbc526
1
Parent(s):
0d67af1
huge refactoring
Browse files- deep_translator/__init__.py +0 -5
- deep_translator/base.py +59 -34
- deep_translator/cli.py +4 -3
- deep_translator/constants.py +0 -10
- deep_translator/deepl.py +16 -27
- deep_translator/engine.py +0 -39
- deep_translator/google_trans.py +17 -119
- deep_translator/libre.py +16 -65
- deep_translator/linguee.py +10 -47
- deep_translator/microsoft.py +27 -76
- deep_translator/mymemory.py +9 -94
- deep_translator/papago.py +9 -84
- deep_translator/pons.py +7 -41
- deep_translator/qcri.py +15 -19
- deep_translator/validate.py +29 -0
- deep_translator/yandex.py +9 -22
deep_translator/__init__.py
CHANGED
@@ -11,7 +11,6 @@ from .detection import single_detection, batch_detection
|
|
11 |
from .microsoft import MicrosoftTranslator
|
12 |
from .papago import PapagoTranslator
|
13 |
from .libre import LibreTranslator
|
14 |
-
from .engine import generate_engines_dict, engine
|
15 |
|
16 |
__author__ = """Nidhal Baccouri"""
|
17 |
__email__ = '[email protected]'
|
@@ -31,7 +30,3 @@ __all__ = [
|
|
31 |
"single_detection",
|
32 |
"batch_detection"
|
33 |
]
|
34 |
-
|
35 |
-
__engines__ = generate_engines_dict(__all__, locals())
|
36 |
-
del generate_engines_dict
|
37 |
-
engine.translation_engines = __engines__
|
|
|
11 |
from .microsoft import MicrosoftTranslator
|
12 |
from .papago import PapagoTranslator
|
13 |
from .libre import LibreTranslator
|
|
|
14 |
|
15 |
__author__ = """Nidhal Baccouri"""
|
16 |
__email__ = '[email protected]'
|
|
|
30 |
"single_detection",
|
31 |
"batch_detection"
|
32 |
]
|
|
|
|
|
|
|
|
deep_translator/base.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1 |
"""base translator class"""
|
2 |
|
3 |
-
from .
|
4 |
from abc import ABC, abstractmethod
|
5 |
-
import string
|
6 |
|
7 |
|
8 |
class BaseTranslator(ABC):
|
@@ -16,55 +15,55 @@ class BaseTranslator(ABC):
|
|
16 |
payload_key=None,
|
17 |
element_tag=None,
|
18 |
element_query=None,
|
|
|
19 |
**url_params):
|
20 |
"""
|
21 |
@param source: source language to translate from
|
22 |
@param target: target language to translate to
|
23 |
"""
|
24 |
-
if source == target:
|
25 |
-
raise InvalidSourceOrTargetLanguage(source)
|
26 |
-
|
27 |
self.__base_url = base_url
|
28 |
-
self._source = source
|
29 |
-
self._target = target
|
30 |
self._url_params = url_params
|
31 |
self._element_tag = element_tag
|
32 |
self._element_query = element_query
|
33 |
self.payload_key = payload_key
|
34 |
-
self.
|
35 |
-
|
36 |
-
|
37 |
-
super(BaseTranslator, self).__init__()
|
38 |
|
39 |
-
|
40 |
-
def _validate_payload(payload, min_chars=1, max_chars=5000):
|
41 |
"""
|
42 |
-
|
43 |
-
@param
|
44 |
-
@return:
|
45 |
"""
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
# check if payload contains only symbols
|
51 |
-
if all(i in string.punctuation for i in payload):
|
52 |
-
raise NotValidPayload(payload)
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
return
|
|
|
|
|
|
|
|
|
57 |
|
58 |
-
|
59 |
-
def __check_length(payload, min_chars, max_chars):
|
60 |
"""
|
61 |
-
check
|
62 |
-
@param
|
63 |
-
@
|
64 |
-
@param max_chars: maximum characters allowed
|
65 |
-
@return: bool
|
66 |
"""
|
67 |
-
|
|
|
|
|
|
|
68 |
|
69 |
@abstractmethod
|
70 |
def translate(self, text, **kwargs):
|
@@ -76,5 +75,31 @@ class BaseTranslator(ABC):
|
|
76 |
"""
|
77 |
return NotImplemented('You need to implement the translate method!')
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
"""base translator class"""
|
2 |
|
3 |
+
from .constants import GOOGLE_LANGUAGES_TO_CODES
|
4 |
from abc import ABC, abstractmethod
|
|
|
5 |
|
6 |
|
7 |
class BaseTranslator(ABC):
|
|
|
15 |
payload_key=None,
|
16 |
element_tag=None,
|
17 |
element_query=None,
|
18 |
+
languages=GOOGLE_LANGUAGES_TO_CODES,
|
19 |
**url_params):
|
20 |
"""
|
21 |
@param source: source language to translate from
|
22 |
@param target: target language to translate to
|
23 |
"""
|
|
|
|
|
|
|
24 |
self.__base_url = base_url
|
25 |
+
self._source, self._target = self._map_language_to_code(source, target)
|
|
|
26 |
self._url_params = url_params
|
27 |
self._element_tag = element_tag
|
28 |
self._element_query = element_query
|
29 |
self.payload_key = payload_key
|
30 |
+
self.languages: dict = languages
|
31 |
+
self.supported_languages: list = list(self.languages.keys())
|
32 |
+
super().__init__()
|
|
|
33 |
|
34 |
+
def _map_language_to_code(self, *languages):
|
|
|
35 |
"""
|
36 |
+
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
37 |
+
@param languages: list of languages
|
38 |
+
@return: mapped value of the language or raise an exception if the language is not supported
|
39 |
"""
|
40 |
+
for language in languages:
|
41 |
+
if language in self.languages.values() or language == 'auto':
|
42 |
+
yield language
|
43 |
+
elif language in self.languages.keys():
|
44 |
+
yield self.languages[language]
|
45 |
|
46 |
+
def _same_source_target(self):
|
47 |
+
return self._source == self._target
|
|
|
|
|
|
|
|
|
48 |
|
49 |
+
def get_supported_languages(self, as_dict=False, **kwargs):
|
50 |
+
"""
|
51 |
+
return the supported languages by the google translator
|
52 |
+
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
53 |
+
@return: list or dict
|
54 |
+
"""
|
55 |
+
return self.supported_languages if not as_dict else self.languages
|
56 |
|
57 |
+
def is_language_supported(self, language, **kwargs):
|
|
|
58 |
"""
|
59 |
+
check if the language is supported by the translator
|
60 |
+
@param language: a string for 1 language
|
61 |
+
@return: bool or raise an Exception
|
|
|
|
|
62 |
"""
|
63 |
+
if language == 'auto' or language in self.languages.keys() or language in self.languages.values():
|
64 |
+
return True
|
65 |
+
else:
|
66 |
+
return False
|
67 |
|
68 |
@abstractmethod
|
69 |
def translate(self, text, **kwargs):
|
|
|
75 |
"""
|
76 |
return NotImplemented('You need to implement the translate method!')
|
77 |
|
78 |
+
def _translate_file(self, path, **kwargs):
|
79 |
+
"""
|
80 |
+
translate directly from file
|
81 |
+
@param path: path to the target file
|
82 |
+
@type path: str
|
83 |
+
@param kwargs: additional args
|
84 |
+
@return: str
|
85 |
+
"""
|
86 |
+
try:
|
87 |
+
with open(path, 'r', encoding='utf-8') as f:
|
88 |
+
text = f.read().strip()
|
89 |
+
return self.translate(text)
|
90 |
+
except Exception as e:
|
91 |
+
raise e
|
92 |
|
93 |
+
def _translate_batch(self, batch=None, **kwargs):
|
94 |
+
"""
|
95 |
+
translate a list of texts
|
96 |
+
@param batch: list of texts you want to translate
|
97 |
+
@return: list of translations
|
98 |
+
"""
|
99 |
+
if not batch:
|
100 |
+
raise Exception("Enter your text list that you want to translate")
|
101 |
+
arr = []
|
102 |
+
for i, text in enumerate(batch):
|
103 |
+
translated = self.translate(text, **kwargs)
|
104 |
+
arr.append(translated)
|
105 |
+
return arr
|
deep_translator/cli.py
CHANGED
@@ -1,16 +1,17 @@
|
|
1 |
"""Console script for deep_translator."""
|
2 |
-
from . import __engines__
|
3 |
import argparse
|
|
|
4 |
|
5 |
|
6 |
class CLI(object):
|
7 |
-
translators_dict =
|
8 |
translator = None
|
9 |
|
10 |
def __init__(self, custom_args=None):
|
11 |
self.custom_args = custom_args
|
12 |
self.args = self.parse_args()
|
13 |
-
|
|
|
14 |
translator_class = self.translators_dict.get(self.args.translator, None)
|
15 |
if not translator_class:
|
16 |
raise Exception(f"Translator {self.args.translator} is not supported."
|
|
|
1 |
"""Console script for deep_translator."""
|
|
|
2 |
import argparse
|
3 |
+
from .base import BaseTranslator
|
4 |
|
5 |
|
6 |
class CLI(object):
|
7 |
+
translators_dict = BaseTranslator.__subclasses__()
|
8 |
translator = None
|
9 |
|
10 |
def __init__(self, custom_args=None):
|
11 |
self.custom_args = custom_args
|
12 |
self.args = self.parse_args()
|
13 |
+
print(f'translators_dict: {self.translators_dict}')
|
14 |
+
exit()
|
15 |
translator_class = self.translators_dict.get(self.args.translator, None)
|
16 |
if not translator_class:
|
17 |
raise Exception(f"Translator {self.args.translator} is not supported."
|
deep_translator/constants.py
CHANGED
@@ -197,16 +197,6 @@ LINGUEE_LANGUAGES_TO_CODES = {
|
|
197 |
|
198 |
LINGUEE_CODE_TO_LANGUAGE = {v: k for k, v in LINGUEE_LANGUAGES_TO_CODES.items()}
|
199 |
|
200 |
-
# "72e9e2cc7c992db4dcbdd6fb9f91a0d1"
|
201 |
-
|
202 |
-
# obtaining the current list of supported Microsoft languages for translation
|
203 |
-
|
204 |
-
microsoft_languages_api_url = "https://api.cognitive.microsofttranslator.com/languages?api-version=3.0&scope=translation"
|
205 |
-
microsoft_languages_response = requests.get(microsoft_languages_api_url)
|
206 |
-
translation_dict = microsoft_languages_response.json()['translation']
|
207 |
-
|
208 |
-
MICROSOFT_CODES_TO_LANGUAGES = {translation_dict[k]['name'].lower(): k for k in translation_dict.keys()}
|
209 |
-
|
210 |
DEEPL_LANGUAGE_TO_CODE = {
|
211 |
"bulgarian": "bg",
|
212 |
"czech": "cs",
|
|
|
197 |
|
198 |
LINGUEE_CODE_TO_LANGUAGE = {v: k for k, v in LINGUEE_LANGUAGES_TO_CODES.items()}
|
199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
DEEPL_LANGUAGE_TO_CODE = {
|
201 |
"bulgarian": "bg",
|
202 |
"czech": "cs",
|
deep_translator/deepl.py
CHANGED
@@ -1,19 +1,19 @@
|
|
1 |
import requests
|
|
|
|
|
2 |
from .constants import BASE_URLS, DEEPL_LANGUAGE_TO_CODE
|
3 |
from .exceptions import (ServerException,
|
4 |
TranslationNotFound,
|
5 |
-
LanguageNotSupportedException,
|
6 |
AuthorizationException)
|
7 |
from .base import BaseTranslator
|
8 |
|
9 |
|
10 |
-
class DeepL(
|
11 |
"""
|
12 |
class that wraps functions, which use the DeepL translator under the hood to translate word(s)
|
13 |
"""
|
14 |
-
_languages = DEEPL_LANGUAGE_TO_CODE
|
15 |
|
16 |
-
def __init__(self, api_key=None, source="
|
17 |
"""
|
18 |
@param api_key: your DeepL api key.
|
19 |
Get one here: https://www.deepl.com/docs-api/accessing-the-api/
|
@@ -24,26 +24,30 @@ class DeepL(object):
|
|
24 |
raise ServerException(401)
|
25 |
self.version = 'v2'
|
26 |
self.api_key = api_key
|
27 |
-
self.source = self._map_language_to_code(source)
|
28 |
-
self.target = self._map_language_to_code(target)
|
29 |
if use_free_api:
|
30 |
self.__base_url = BASE_URLS.get(
|
31 |
"DEEPL_FREE").format(version=self.version)
|
32 |
else:
|
33 |
self.__base_url = BASE_URLS.get(
|
34 |
"DEEPL").format(version=self.version)
|
|
|
|
|
|
|
35 |
|
36 |
def translate(self, text, **kwargs):
|
37 |
"""
|
38 |
@param text: text to translate
|
39 |
@return: translated text
|
40 |
"""
|
|
|
|
|
|
|
41 |
# Create the request parameters.
|
42 |
translate_endpoint = 'translate'
|
43 |
params = {
|
44 |
"auth_key": self.api_key,
|
45 |
-
"source_lang": self.
|
46 |
-
"target_lang": self.
|
47 |
"text": text
|
48 |
}
|
49 |
# Do the request and check the connection.
|
@@ -64,30 +68,15 @@ class DeepL(object):
|
|
64 |
# Process and return the response.
|
65 |
return res['translations'][0]['text']
|
66 |
|
|
|
|
|
|
|
67 |
def translate_batch(self, batch, **kwargs):
|
68 |
"""
|
69 |
@param batch: list of texts to translate
|
70 |
@return: list of translations
|
71 |
"""
|
72 |
-
return
|
73 |
-
|
74 |
-
@staticmethod
|
75 |
-
def get_supported_languages(as_dict=False, **kwargs):
|
76 |
-
return [*DeepL._languages.keys()] if not as_dict else DeepL._languages
|
77 |
-
|
78 |
-
def _is_language_supported(self, lang, **kwargs):
|
79 |
-
# The language is supported when is in the dicionary.
|
80 |
-
return lang == 'auto' or lang in self._languages.keys() or lang in self._languages.values()
|
81 |
-
|
82 |
-
def _map_language_to_code(self, lang, **kwargs):
|
83 |
-
if lang in self._languages.keys():
|
84 |
-
return self._languages[lang]
|
85 |
-
elif lang in self._languages.values():
|
86 |
-
return lang
|
87 |
-
raise LanguageNotSupportedException(lang)
|
88 |
-
|
89 |
-
|
90 |
-
BaseTranslator.register(DeepL)
|
91 |
|
92 |
|
93 |
if __name__ == '__main__':
|
|
|
1 |
import requests
|
2 |
+
|
3 |
+
from validate import is_empty
|
4 |
from .constants import BASE_URLS, DEEPL_LANGUAGE_TO_CODE
|
5 |
from .exceptions import (ServerException,
|
6 |
TranslationNotFound,
|
|
|
7 |
AuthorizationException)
|
8 |
from .base import BaseTranslator
|
9 |
|
10 |
|
11 |
+
class DeepL(BaseTranslator):
|
12 |
"""
|
13 |
class that wraps functions, which use the DeepL translator under the hood to translate word(s)
|
14 |
"""
|
|
|
15 |
|
16 |
+
def __init__(self, api_key=None, source="de", target="en", use_free_api=True, **kwargs):
|
17 |
"""
|
18 |
@param api_key: your DeepL api key.
|
19 |
Get one here: https://www.deepl.com/docs-api/accessing-the-api/
|
|
|
24 |
raise ServerException(401)
|
25 |
self.version = 'v2'
|
26 |
self.api_key = api_key
|
|
|
|
|
27 |
if use_free_api:
|
28 |
self.__base_url = BASE_URLS.get(
|
29 |
"DEEPL_FREE").format(version=self.version)
|
30 |
else:
|
31 |
self.__base_url = BASE_URLS.get(
|
32 |
"DEEPL").format(version=self.version)
|
33 |
+
super().__init__(source=source,
|
34 |
+
target=target,
|
35 |
+
languages=DEEPL_LANGUAGE_TO_CODE)
|
36 |
|
37 |
def translate(self, text, **kwargs):
|
38 |
"""
|
39 |
@param text: text to translate
|
40 |
@return: translated text
|
41 |
"""
|
42 |
+
if self._same_source_target() or is_empty(text):
|
43 |
+
return text
|
44 |
+
|
45 |
# Create the request parameters.
|
46 |
translate_endpoint = 'translate'
|
47 |
params = {
|
48 |
"auth_key": self.api_key,
|
49 |
+
"source_lang": self._source,
|
50 |
+
"target_lang": self._target,
|
51 |
"text": text
|
52 |
}
|
53 |
# Do the request and check the connection.
|
|
|
68 |
# Process and return the response.
|
69 |
return res['translations'][0]['text']
|
70 |
|
71 |
+
def translate_file(self, path, **kwargs):
|
72 |
+
return self._translate_file(path, **kwargs)
|
73 |
+
|
74 |
def translate_batch(self, batch, **kwargs):
|
75 |
"""
|
76 |
@param batch: list of texts to translate
|
77 |
@return: list of translations
|
78 |
"""
|
79 |
+
return self._translate_batch(batch, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
|
82 |
if __name__ == '__main__':
|
deep_translator/engine.py
DELETED
@@ -1,39 +0,0 @@
|
|
1 |
-
from .base import BaseTranslator
|
2 |
-
|
3 |
-
|
4 |
-
def generate_engines_dict(_all: list, _locals: dict) -> dict:
|
5 |
-
base_translator_type = BaseTranslator
|
6 |
-
|
7 |
-
def is_translator(__object) -> bool:
|
8 |
-
try:
|
9 |
-
return issubclass(__object, base_translator_type)
|
10 |
-
except TypeError:
|
11 |
-
return False
|
12 |
-
|
13 |
-
translation_engines = {}
|
14 |
-
for _object in _all:
|
15 |
-
__object = _locals.get(_object, 'failed')
|
16 |
-
key_name = _object.replace('Translator', '').lower()
|
17 |
-
if is_translator(__object):
|
18 |
-
translation_engines.update({key_name: __object})
|
19 |
-
return translation_engines
|
20 |
-
|
21 |
-
|
22 |
-
def engine(engine_name: str, *args, **kwargs) -> BaseTranslator:
|
23 |
-
"""Return translation engine.
|
24 |
-
|
25 |
-
Free and keyless engines are 'google', 'pons', 'linguee', 'mymemory',
|
26 |
-
'libre'.
|
27 |
-
|
28 |
-
Args:
|
29 |
-
engine_name: the name of the engine
|
30 |
-
*args: positional argument to pass to the engine
|
31 |
-
**kwargs: named argument to pass to the engine
|
32 |
-
Return:
|
33 |
-
A translation engine
|
34 |
-
"""
|
35 |
-
try:
|
36 |
-
return engine.translation_engines[engine_name.lower()](*args, **kwargs)
|
37 |
-
except KeyError:
|
38 |
-
keys = '\', \''.join(engine.translation_engines.keys())
|
39 |
-
raise(KeyError(f'Please provide a valid engine name (\'{keys}\')'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
deep_translator/google_trans.py
CHANGED
@@ -2,22 +2,18 @@
|
|
2 |
google translator API
|
3 |
"""
|
4 |
|
5 |
-
from .constants import BASE_URLS
|
6 |
-
from .exceptions import TooManyRequests,
|
7 |
from .base import BaseTranslator
|
|
|
8 |
from bs4 import BeautifulSoup
|
9 |
import requests
|
10 |
-
from time import sleep
|
11 |
-
import warnings
|
12 |
-
import logging
|
13 |
|
14 |
|
15 |
class GoogleTranslator(BaseTranslator):
|
16 |
"""
|
17 |
class that wraps functions, which use google translate under the hood to translate text(s)
|
18 |
"""
|
19 |
-
_languages = GOOGLE_LANGUAGES_TO_CODES
|
20 |
-
supported_languages = list(_languages.keys())
|
21 |
|
22 |
def __init__(self, source="auto", target="en", proxies=None, **kwargs):
|
23 |
"""
|
@@ -26,88 +22,28 @@ class GoogleTranslator(BaseTranslator):
|
|
26 |
"""
|
27 |
self.__base_url = BASE_URLS.get("GOOGLE_TRANSLATE")
|
28 |
self.proxies = proxies
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
target_lower=target.lower()
|
39 |
-
#######################################
|
40 |
-
|
41 |
-
if self.is_language_supported(source_lower, target_lower):
|
42 |
-
self._source, self._target = self._map_language_to_code(source_lower, target_lower)
|
43 |
-
|
44 |
-
super(GoogleTranslator, self).__init__(base_url=self.__base_url,
|
45 |
-
source=self._source,
|
46 |
-
target=self._target,
|
47 |
-
element_tag='div',
|
48 |
-
element_query={"class": "t0"},
|
49 |
-
payload_key='q', # key of text in the url
|
50 |
-
tl=self._target,
|
51 |
-
sl=self._source,
|
52 |
-
**kwargs)
|
53 |
|
54 |
self._alt_element_query = {"class": "result-container"}
|
55 |
|
56 |
-
@staticmethod
|
57 |
-
def get_supported_languages(as_dict=False, **kwargs):
|
58 |
-
"""
|
59 |
-
return the supported languages by the google translator
|
60 |
-
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
61 |
-
@return: list or dict
|
62 |
-
"""
|
63 |
-
return GoogleTranslator.supported_languages if not as_dict else GoogleTranslator._languages
|
64 |
-
|
65 |
-
def is_secondary(self, lang):
|
66 |
-
"""
|
67 |
-
Function to check if lang is a secondary name of any primary language
|
68 |
-
@param lang: language name
|
69 |
-
@return: primary name of a language if found otherwise False
|
70 |
-
"""
|
71 |
-
for primary_name, secondary_names in GOOGLE_LANGUAGES_SECONDARY_NAMES.items():
|
72 |
-
if lang in secondary_names:
|
73 |
-
return primary_name
|
74 |
-
return False
|
75 |
-
|
76 |
-
def _map_language_to_code(self, *languages):
|
77 |
-
"""
|
78 |
-
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
79 |
-
@param languages: list of languages
|
80 |
-
@return: mapped value of the language or raise an exception if the language is not supported
|
81 |
-
"""
|
82 |
-
for language in languages:
|
83 |
-
if language in self._languages.values() or language == 'auto':
|
84 |
-
yield language
|
85 |
-
elif language in self._languages.keys():
|
86 |
-
yield self._languages[language]
|
87 |
-
else:
|
88 |
-
yield self._languages[self.is_secondary(language)]
|
89 |
-
|
90 |
-
def is_language_supported(self, *languages):
|
91 |
-
"""
|
92 |
-
check if the language is supported by the translator
|
93 |
-
@param languages: list of languages
|
94 |
-
@return: bool or raise an Exception
|
95 |
-
"""
|
96 |
-
for lang in languages:
|
97 |
-
if lang != 'auto' and lang not in self._languages.keys():
|
98 |
-
if lang != 'auto' and lang not in self._languages.values():
|
99 |
-
if not self.is_secondary(lang):
|
100 |
-
raise LanguageNotSupportedException(lang)
|
101 |
-
return True
|
102 |
-
|
103 |
def translate(self, text, **kwargs):
|
104 |
"""
|
105 |
function that uses google translate to translate a text
|
106 |
@param text: desired text to translate
|
107 |
@return: str: translated text
|
108 |
"""
|
|
|
|
|
109 |
|
110 |
-
if
|
111 |
text = text.strip()
|
112 |
|
113 |
if self.payload_key:
|
@@ -151,37 +87,7 @@ class GoogleTranslator(BaseTranslator):
|
|
151 |
@param kwargs: additional args
|
152 |
@return: str
|
153 |
"""
|
154 |
-
|
155 |
-
with open(path, 'r', encoding='utf-8') as f:
|
156 |
-
text = f.read().strip()
|
157 |
-
return self.translate(text)
|
158 |
-
except Exception as e:
|
159 |
-
raise e
|
160 |
-
|
161 |
-
def translate_sentences(self, sentences=None, **kwargs):
|
162 |
-
"""
|
163 |
-
translate many sentences together. This makes sense if you have sentences with different languages
|
164 |
-
and you want to translate all to unified language. This is handy because it detects
|
165 |
-
automatically the language of each sentence and then translate it.
|
166 |
-
|
167 |
-
@param sentences: list of sentences to translate
|
168 |
-
@return: list of all translated sentences
|
169 |
-
"""
|
170 |
-
warnings.warn("deprecated. Use the translate_batch function instead", DeprecationWarning, stacklevel=2)
|
171 |
-
logging.warning("deprecated. Use the translate_batch function instead")
|
172 |
-
if not sentences:
|
173 |
-
raise NotValidPayload(sentences)
|
174 |
-
|
175 |
-
translated_sentences = []
|
176 |
-
try:
|
177 |
-
for sentence in sentences:
|
178 |
-
translated = self.translate(text=sentence)
|
179 |
-
translated_sentences.append(translated)
|
180 |
-
|
181 |
-
return translated_sentences
|
182 |
-
|
183 |
-
except Exception as e:
|
184 |
-
raise e
|
185 |
|
186 |
def translate_batch(self, batch=None, **kwargs):
|
187 |
"""
|
@@ -189,12 +95,4 @@ class GoogleTranslator(BaseTranslator):
|
|
189 |
@param batch: list of texts you want to translate
|
190 |
@return: list of translations
|
191 |
"""
|
192 |
-
|
193 |
-
raise Exception("Enter your text list that you want to translate")
|
194 |
-
arr = []
|
195 |
-
for i, text in enumerate(batch):
|
196 |
-
|
197 |
-
translated = self.translate(text, **kwargs)
|
198 |
-
arr.append(translated)
|
199 |
-
return arr
|
200 |
-
|
|
|
2 |
google translator API
|
3 |
"""
|
4 |
|
5 |
+
from .constants import BASE_URLS
|
6 |
+
from .exceptions import TooManyRequests, TranslationNotFound, RequestError
|
7 |
from .base import BaseTranslator
|
8 |
+
from .validate import validate_input, is_empty
|
9 |
from bs4 import BeautifulSoup
|
10 |
import requests
|
|
|
|
|
|
|
11 |
|
12 |
|
13 |
class GoogleTranslator(BaseTranslator):
|
14 |
"""
|
15 |
class that wraps functions, which use google translate under the hood to translate text(s)
|
16 |
"""
|
|
|
|
|
17 |
|
18 |
def __init__(self, source="auto", target="en", proxies=None, **kwargs):
|
19 |
"""
|
|
|
22 |
"""
|
23 |
self.__base_url = BASE_URLS.get("GOOGLE_TRANSLATE")
|
24 |
self.proxies = proxies
|
25 |
+
super().__init__(base_url=self.__base_url,
|
26 |
+
source=source,
|
27 |
+
target=target,
|
28 |
+
element_tag='div',
|
29 |
+
element_query={"class": "t0"},
|
30 |
+
payload_key='q', # key of text in the url
|
31 |
+
tl=self._target,
|
32 |
+
sl=self._source,
|
33 |
+
**kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
self._alt_element_query = {"class": "result-container"}
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
def translate(self, text, **kwargs):
|
38 |
"""
|
39 |
function that uses google translate to translate a text
|
40 |
@param text: desired text to translate
|
41 |
@return: str: translated text
|
42 |
"""
|
43 |
+
if self._same_source_target() or is_empty(text):
|
44 |
+
return text
|
45 |
|
46 |
+
if validate_input(text):
|
47 |
text = text.strip()
|
48 |
|
49 |
if self.payload_key:
|
|
|
87 |
@param kwargs: additional args
|
88 |
@return: str
|
89 |
"""
|
90 |
+
return self._translate_file(path, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
def translate_batch(self, batch=None, **kwargs):
|
93 |
"""
|
|
|
95 |
@param batch: list of texts you want to translate
|
96 |
@return: list of translations
|
97 |
"""
|
98 |
+
return self._translate_batch(batch, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
deep_translator/libre.py
CHANGED
@@ -3,71 +3,34 @@ LibreTranslate API
|
|
3 |
"""
|
4 |
|
5 |
import requests
|
|
|
|
|
6 |
from .base import BaseTranslator
|
7 |
-
from .constants import BASE_URLS,LIBRE_LANGUAGES_TO_CODES
|
8 |
from .exceptions import (ServerException,
|
9 |
TranslationNotFound,
|
10 |
-
|
11 |
-
AuthorizationException,
|
12 |
-
NotValidPayload)
|
13 |
|
14 |
|
15 |
class LibreTranslator(BaseTranslator):
|
16 |
"""
|
17 |
class that wraps functions, which use libre translator under the hood to translate text(s)
|
18 |
"""
|
19 |
-
_languages = LIBRE_LANGUAGES_TO_CODES
|
20 |
-
supported_languages = list(_languages.keys())
|
21 |
|
22 |
-
def __init__(self,source="auto", target="en",
|
23 |
"""
|
24 |
@param source: source language to translate from
|
25 |
List of LibreTranslate nedpoints can be found at : https://github.com/LibreTranslate/LibreTranslate#mirrors
|
26 |
Some require an API key
|
27 |
@param target: target language to translate to
|
28 |
"""
|
29 |
-
if
|
30 |
raise ServerException(401)
|
31 |
-
self.__base_url =
|
32 |
self.api_key = api_key
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
self.source = self._map_language_to_code(source)
|
37 |
-
self.target = self._map_language_to_code(target)
|
38 |
-
|
39 |
-
|
40 |
-
@staticmethod
|
41 |
-
def get_supported_languages(as_dict=False, **kwargs):
|
42 |
-
"""
|
43 |
-
return the supported languages by the libre translator
|
44 |
-
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
45 |
-
@return: list or dict
|
46 |
-
"""
|
47 |
-
return [*LibreTranslator._languages.keys()] if not as_dict else LibreTranslator._languages
|
48 |
-
|
49 |
-
def _map_language_to_code(self, language, **kwargs):
|
50 |
-
"""
|
51 |
-
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
52 |
-
@param language: a string for 1 language
|
53 |
-
@return: mapped value of the language or raise an exception if the language is not supported
|
54 |
-
"""
|
55 |
-
if language in self._languages.keys():
|
56 |
-
return self._languages[language]
|
57 |
-
elif language in self._languages.values():
|
58 |
-
return language
|
59 |
-
raise LanguageNotSupportedException(language)
|
60 |
-
|
61 |
-
def _is_language_supported(self, language, **kwargs):
|
62 |
-
"""
|
63 |
-
check if the language is supported by the translator
|
64 |
-
@param language: a string for 1 language
|
65 |
-
@return: bool or raise an Exception
|
66 |
-
"""
|
67 |
-
if language == 'auto' or language in self._languages.keys() or language in self._languages.values():
|
68 |
-
return True
|
69 |
-
else:
|
70 |
-
raise LanguageNotSupportedException(language)
|
71 |
|
72 |
def translate(self, text, **kwargs):
|
73 |
"""
|
@@ -75,15 +38,14 @@ class LibreTranslator(BaseTranslator):
|
|
75 |
@param text: desired text to translate
|
76 |
@return: str: translated text
|
77 |
"""
|
78 |
-
|
79 |
-
|
80 |
-
raise NotValidPayload(text)
|
81 |
|
82 |
translate_endpoint = 'translate'
|
83 |
params = {
|
84 |
"q": text,
|
85 |
-
"source": self.
|
86 |
-
"target": self.
|
87 |
"format": 'text'
|
88 |
}
|
89 |
# Add API Key if required
|
@@ -115,12 +77,7 @@ class LibreTranslator(BaseTranslator):
|
|
115 |
@param kwargs: additional args
|
116 |
@return: str
|
117 |
"""
|
118 |
-
|
119 |
-
with open(path, 'r', encoding='utf-8') as f:
|
120 |
-
text = f.read().strip()
|
121 |
-
return self.translate(text)
|
122 |
-
except Exception as e:
|
123 |
-
raise e
|
124 |
|
125 |
def translate_batch(self, batch=None, **kwargs):
|
126 |
"""
|
@@ -128,10 +85,4 @@ class LibreTranslator(BaseTranslator):
|
|
128 |
@param batch: list of texts you want to translate
|
129 |
@return: list of translations
|
130 |
"""
|
131 |
-
|
132 |
-
raise Exception("Enter your text list that you want to translate")
|
133 |
-
arr = []
|
134 |
-
for i, text in enumerate(batch):
|
135 |
-
translated = self.translate(text, **kwargs)
|
136 |
-
arr.append(translated)
|
137 |
-
return arr
|
|
|
3 |
"""
|
4 |
|
5 |
import requests
|
6 |
+
|
7 |
+
from validate import is_empty
|
8 |
from .base import BaseTranslator
|
9 |
+
from .constants import BASE_URLS,LIBRE_LANGUAGES_TO_CODES
|
10 |
from .exceptions import (ServerException,
|
11 |
TranslationNotFound,
|
12 |
+
AuthorizationException)
|
|
|
|
|
13 |
|
14 |
|
15 |
class LibreTranslator(BaseTranslator):
|
16 |
"""
|
17 |
class that wraps functions, which use libre translator under the hood to translate text(s)
|
18 |
"""
|
|
|
|
|
19 |
|
20 |
+
def __init__(self, source="auto", target="en", api_key=None, **kwargs):
|
21 |
"""
|
22 |
@param source: source language to translate from
|
23 |
List of LibreTranslate nedpoints can be found at : https://github.com/LibreTranslate/LibreTranslate#mirrors
|
24 |
Some require an API key
|
25 |
@param target: target language to translate to
|
26 |
"""
|
27 |
+
if not api_key:
|
28 |
raise ServerException(401)
|
29 |
+
self.__base_url = BASE_URLS.get("LIBRE")
|
30 |
self.api_key = api_key
|
31 |
+
super().__init__(source=source,
|
32 |
+
target=target,
|
33 |
+
languages=LIBRE_LANGUAGES_TO_CODES)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
def translate(self, text, **kwargs):
|
36 |
"""
|
|
|
38 |
@param text: desired text to translate
|
39 |
@return: str: translated text
|
40 |
"""
|
41 |
+
if self._same_source_target() or is_empty(text):
|
42 |
+
return text
|
|
|
43 |
|
44 |
translate_endpoint = 'translate'
|
45 |
params = {
|
46 |
"q": text,
|
47 |
+
"source": self._source,
|
48 |
+
"target": self._target,
|
49 |
"format": 'text'
|
50 |
}
|
51 |
# Add API Key if required
|
|
|
77 |
@param kwargs: additional args
|
78 |
@return: str
|
79 |
"""
|
80 |
+
return self._translate_file(path, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
def translate_batch(self, batch=None, **kwargs):
|
83 |
"""
|
|
|
85 |
@param batch: list of texts you want to translate
|
86 |
@return: list of translations
|
87 |
"""
|
88 |
+
return self._translate_batch(batch, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
deep_translator/linguee.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
"""
|
2 |
linguee translator API
|
3 |
"""
|
4 |
-
|
5 |
-
from .constants import BASE_URLS, LINGUEE_LANGUAGES_TO_CODES
|
6 |
-
from .exceptions import (
|
7 |
TranslationNotFound,
|
8 |
NotValidPayload,
|
9 |
ElementNotFoundInGetRequest,
|
@@ -19,8 +19,6 @@ class LingueeTranslator(BaseTranslator):
|
|
19 |
"""
|
20 |
class that wraps functions, which use the linguee translator under the hood to translate word(s)
|
21 |
"""
|
22 |
-
_languages = LINGUEE_LANGUAGES_TO_CODES
|
23 |
-
supported_languages = list(_languages.keys())
|
24 |
|
25 |
def __init__(self, source, target="en", proxies=None, **kwargs):
|
26 |
"""
|
@@ -29,53 +27,15 @@ class LingueeTranslator(BaseTranslator):
|
|
29 |
"""
|
30 |
self.__base_url = BASE_URLS.get("LINGUEE")
|
31 |
self.proxies = proxies
|
32 |
-
|
33 |
-
if self.is_language_supported(source, target):
|
34 |
-
self._source, self._target = self._map_language_to_code(source.lower(), target.lower())
|
35 |
-
|
36 |
super().__init__(base_url=self.__base_url,
|
37 |
-
source=
|
38 |
-
target=
|
|
|
39 |
element_tag='a',
|
40 |
element_query={'class': 'dictLink featured'},
|
41 |
payload_key=None, # key of text in the url
|
42 |
)
|
43 |
|
44 |
-
@staticmethod
|
45 |
-
def get_supported_languages(as_dict=False, **kwargs):
|
46 |
-
"""
|
47 |
-
return the supported languages by the linguee translator
|
48 |
-
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
49 |
-
@return: list or dict
|
50 |
-
"""
|
51 |
-
return LingueeTranslator.supported_languages if not as_dict else LingueeTranslator._languages
|
52 |
-
|
53 |
-
def _map_language_to_code(self, *languages, **kwargs):
|
54 |
-
"""
|
55 |
-
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
56 |
-
@param languages: list of languages
|
57 |
-
@return: mapped value of the language or raise an exception if the language is not supported
|
58 |
-
"""
|
59 |
-
for language in languages:
|
60 |
-
if language in self._languages.values():
|
61 |
-
yield LINGUEE_CODE_TO_LANGUAGE[language]
|
62 |
-
elif language in self._languages.keys():
|
63 |
-
yield language
|
64 |
-
else:
|
65 |
-
raise LanguageNotSupportedException(language)
|
66 |
-
|
67 |
-
def is_language_supported(self, *languages, **kwargs):
|
68 |
-
"""
|
69 |
-
check if the language is supported by the translator
|
70 |
-
@param languages: list of languages
|
71 |
-
@return: bool or raise an Exception
|
72 |
-
"""
|
73 |
-
for lang in languages:
|
74 |
-
if lang not in self._languages.keys():
|
75 |
-
if lang not in self._languages.values():
|
76 |
-
raise LanguageNotSupportedException(lang)
|
77 |
-
return True
|
78 |
-
|
79 |
def translate(self, word, return_all=False, **kwargs):
|
80 |
"""
|
81 |
function that uses linguee to translate a word
|
@@ -85,7 +45,10 @@ class LingueeTranslator(BaseTranslator):
|
|
85 |
@type return_all: bool
|
86 |
@return: str: translated word
|
87 |
"""
|
88 |
-
if self.
|
|
|
|
|
|
|
89 |
# %s-%s/translation/%s.html
|
90 |
url = "{}{}-{}/translation/{}.html".format(self.__base_url, self._source, self._target, word)
|
91 |
url = requote_uri(url)
|
|
|
1 |
"""
|
2 |
linguee translator API
|
3 |
"""
|
4 |
+
from validate import validate_input, is_empty
|
5 |
+
from .constants import BASE_URLS, LINGUEE_LANGUAGES_TO_CODES
|
6 |
+
from .exceptions import (
|
7 |
TranslationNotFound,
|
8 |
NotValidPayload,
|
9 |
ElementNotFoundInGetRequest,
|
|
|
19 |
"""
|
20 |
class that wraps functions, which use the linguee translator under the hood to translate word(s)
|
21 |
"""
|
|
|
|
|
22 |
|
23 |
def __init__(self, source, target="en", proxies=None, **kwargs):
|
24 |
"""
|
|
|
27 |
"""
|
28 |
self.__base_url = BASE_URLS.get("LINGUEE")
|
29 |
self.proxies = proxies
|
|
|
|
|
|
|
|
|
30 |
super().__init__(base_url=self.__base_url,
|
31 |
+
source=source,
|
32 |
+
target=target,
|
33 |
+
languages=LINGUEE_LANGUAGES_TO_CODES,
|
34 |
element_tag='a',
|
35 |
element_query={'class': 'dictLink featured'},
|
36 |
payload_key=None, # key of text in the url
|
37 |
)
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
def translate(self, word, return_all=False, **kwargs):
|
40 |
"""
|
41 |
function that uses linguee to translate a word
|
|
|
45 |
@type return_all: bool
|
46 |
@return: str: translated word
|
47 |
"""
|
48 |
+
if self._same_source_target() or is_empty(word):
|
49 |
+
return word
|
50 |
+
|
51 |
+
if validate_input(word, max_chars=50):
|
52 |
# %s-%s/translation/%s.html
|
53 |
url = "{}{}-{}/translation/{}.html".format(self.__base_url, self._source, self._target, word)
|
54 |
url = requote_uri(url)
|
deep_translator/microsoft.py
CHANGED
@@ -3,31 +3,29 @@
|
|
3 |
import requests
|
4 |
import logging
|
5 |
import sys
|
6 |
-
|
7 |
-
from .
|
8 |
-
from .exceptions import LanguageNotSupportedException, ServerException, MicrosoftAPIerror
|
9 |
from .base import BaseTranslator
|
10 |
|
11 |
|
12 |
-
class MicrosoftTranslator:
|
13 |
"""
|
14 |
the class that wraps functions, which use the Microsoft translator under the hood to translate word(s)
|
15 |
"""
|
16 |
|
17 |
-
_languages = MICROSOFT_CODES_TO_LANGUAGES
|
18 |
-
supported_languages = list(_languages.values())
|
19 |
-
|
20 |
def __init__(self, api_key=None, region=None, source=None, target=None, proxies=None, **kwargs):
|
21 |
"""
|
22 |
@params api_key and target are the required params
|
23 |
@param api_key: your Microsoft API key
|
24 |
@param region: your Microsoft Location
|
25 |
"""
|
|
|
|
|
|
|
26 |
if not api_key:
|
27 |
raise ServerException(401)
|
28 |
-
else:
|
29 |
-
self.api_key = api_key
|
30 |
|
|
|
31 |
self.proxies = proxies
|
32 |
self.headers = {
|
33 |
"Ocp-Apim-Subscription-Key": self.api_key,
|
@@ -37,65 +35,22 @@ class MicrosoftTranslator:
|
|
37 |
if region:
|
38 |
self.region = region
|
39 |
self.headers["Ocp-Apim-Subscription-Region"] = self.region
|
40 |
-
|
41 |
-
if not target:
|
42 |
-
raise ServerException(401)
|
43 |
-
else:
|
44 |
-
if type(target) is str:
|
45 |
-
self.target = target.lower()
|
46 |
-
else:
|
47 |
-
self.target = [i.lower() for i in target]
|
48 |
-
if self.is_language_supported(self.target):
|
49 |
-
self.target = self._map_language_to_code(self.target)
|
50 |
-
|
51 |
-
self.url_params = {'to': self.target, **kwargs}
|
52 |
-
|
53 |
-
if source:
|
54 |
-
self.source = source.lower()
|
55 |
-
if self.is_language_supported(self.source):
|
56 |
-
self.source = self._map_language_to_code(self.source)
|
57 |
-
self.url_params['from'] = self.source
|
58 |
-
|
59 |
self.__base_url = BASE_URLS.get("MICROSOFT_TRANSLATE")
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
-
|
62 |
-
def get_supported_languages(as_dict=False, **kwargs):
|
63 |
-
"""
|
64 |
-
return the languages supported by the microsoft translator
|
65 |
-
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
66 |
-
@return: list or dict
|
67 |
-
"""
|
68 |
-
return MicrosoftTranslator.supported_languages if not as_dict else MicrosoftTranslator._languages
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
if type(language) is str:
|
77 |
-
language = [language]
|
78 |
-
for lang in language:
|
79 |
-
if lang in self._languages.values():
|
80 |
-
yield lang
|
81 |
-
elif lang in self._languages.keys():
|
82 |
-
yield self._languages[lang]
|
83 |
-
else:
|
84 |
-
raise LanguageNotSupportedException(lang)
|
85 |
-
|
86 |
-
def is_language_supported(self, language, **kwargs):
|
87 |
-
"""
|
88 |
-
check if the language is supported by the translator
|
89 |
-
@param language: a string (if 1 lang) or a list (if multiple langs)
|
90 |
-
@return: bool or raise an Exception
|
91 |
-
"""
|
92 |
-
if type(language) is str:
|
93 |
-
language = [language]
|
94 |
-
for lang in language:
|
95 |
-
if lang not in self._languages.keys():
|
96 |
-
if lang not in self._languages.values():
|
97 |
-
raise LanguageNotSupportedException(lang)
|
98 |
-
return True
|
99 |
|
100 |
def translate(self, text, **kwargs):
|
101 |
"""
|
@@ -105,10 +60,14 @@ class MicrosoftTranslator:
|
|
105 |
"""
|
106 |
# a body must be a list of dicts to process multiple texts;
|
107 |
# I have not added multiple text processing here since it is covered by the translate_batch method
|
|
|
|
|
|
|
|
|
108 |
valid_microsoft_json = [{'text': text}]
|
109 |
try:
|
110 |
requested = requests.post(self.__base_url,
|
111 |
-
params=self.
|
112 |
headers=self.headers,
|
113 |
json=valid_microsoft_json,
|
114 |
proxies=self.proxies)
|
@@ -132,12 +91,7 @@ class MicrosoftTranslator:
|
|
132 |
@param path: path to file
|
133 |
@return: translated text
|
134 |
"""
|
135 |
-
|
136 |
-
with open(path, 'r', encoding='utf-8') as f:
|
137 |
-
text = f.read().strip()
|
138 |
-
return self.translate(text)
|
139 |
-
except Exception as e:
|
140 |
-
raise e
|
141 |
|
142 |
def translate_batch(self, batch, **kwargs):
|
143 |
"""
|
@@ -145,7 +99,4 @@ class MicrosoftTranslator:
|
|
145 |
@param batch: list of texts to translate
|
146 |
@return: list of translations
|
147 |
"""
|
148 |
-
return
|
149 |
-
|
150 |
-
|
151 |
-
BaseTranslator.register(MicrosoftTranslator)
|
|
|
3 |
import requests
|
4 |
import logging
|
5 |
import sys
|
6 |
+
from .constants import BASE_URLS
|
7 |
+
from .exceptions import ServerException, MicrosoftAPIerror
|
|
|
8 |
from .base import BaseTranslator
|
9 |
|
10 |
|
11 |
+
class MicrosoftTranslator(BaseTranslator):
|
12 |
"""
|
13 |
the class that wraps functions, which use the Microsoft translator under the hood to translate word(s)
|
14 |
"""
|
15 |
|
|
|
|
|
|
|
16 |
def __init__(self, api_key=None, region=None, source=None, target=None, proxies=None, **kwargs):
|
17 |
"""
|
18 |
@params api_key and target are the required params
|
19 |
@param api_key: your Microsoft API key
|
20 |
@param region: your Microsoft Location
|
21 |
"""
|
22 |
+
|
23 |
+
MICROSOFT_CODES_TO_LANGUAGES = self._get_supported_languages()
|
24 |
+
|
25 |
if not api_key:
|
26 |
raise ServerException(401)
|
|
|
|
|
27 |
|
28 |
+
self.api_key = api_key
|
29 |
self.proxies = proxies
|
30 |
self.headers = {
|
31 |
"Ocp-Apim-Subscription-Key": self.api_key,
|
|
|
35 |
if region:
|
36 |
self.region = region
|
37 |
self.headers["Ocp-Apim-Subscription-Region"] = self.region
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
self.__base_url = BASE_URLS.get("MICROSOFT_TRANSLATE")
|
39 |
+
super().__init__(
|
40 |
+
source=source,
|
41 |
+
target=target,
|
42 |
+
languages=MICROSOFT_CODES_TO_LANGUAGES,
|
43 |
+
**kwargs
|
44 |
+
)
|
45 |
|
46 |
+
def _get_supported_languages(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
+
microsoft_languages_api_url = \
|
49 |
+
"https://api.cognitive.microsofttranslator.com/languages?api-version=3.0&scope=translation"
|
50 |
+
microsoft_languages_response = requests.get(microsoft_languages_api_url)
|
51 |
+
translation_dict = microsoft_languages_response.json()['translation']
|
52 |
+
|
53 |
+
return {translation_dict[k]['name'].lower(): k for k in translation_dict.keys()}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
def translate(self, text, **kwargs):
|
56 |
"""
|
|
|
60 |
"""
|
61 |
# a body must be a list of dicts to process multiple texts;
|
62 |
# I have not added multiple text processing here since it is covered by the translate_batch method
|
63 |
+
|
64 |
+
self._url_params['from'] = self._source
|
65 |
+
self._url_params['to'] = self._target
|
66 |
+
|
67 |
valid_microsoft_json = [{'text': text}]
|
68 |
try:
|
69 |
requested = requests.post(self.__base_url,
|
70 |
+
params=self._url_params,
|
71 |
headers=self.headers,
|
72 |
json=valid_microsoft_json,
|
73 |
proxies=self.proxies)
|
|
|
91 |
@param path: path to file
|
92 |
@return: translated text
|
93 |
"""
|
94 |
+
return self._translate_file(path, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
def translate_batch(self, batch, **kwargs):
|
97 |
"""
|
|
|
99 |
@param batch: list of texts to translate
|
100 |
@return: list of translations
|
101 |
"""
|
102 |
+
return self._translate_batch(batch, **kwargs)
|
|
|
|
|
|
deep_translator/mymemory.py
CHANGED
@@ -1,27 +1,20 @@
|
|
1 |
"""
|
2 |
mymemory translator API
|
3 |
"""
|
4 |
-
import
|
5 |
-
import
|
6 |
-
|
7 |
-
from .constants import BASE_URLS, GOOGLE_LANGUAGES_TO_CODES
|
8 |
-
from .exceptions import (NotValidPayload,
|
9 |
TranslationNotFound,
|
10 |
-
LanguageNotSupportedException,
|
11 |
RequestError,
|
12 |
TooManyRequests)
|
13 |
from .base import BaseTranslator
|
14 |
import requests
|
15 |
-
from time import sleep
|
16 |
|
17 |
|
18 |
class MyMemoryTranslator(BaseTranslator):
|
19 |
"""
|
20 |
class that uses the mymemory translator to translate texts
|
21 |
"""
|
22 |
-
_languages = GOOGLE_LANGUAGES_TO_CODES
|
23 |
-
supported_languages = list(_languages.keys())
|
24 |
-
|
25 |
def __init__(self, source="auto", target="en", proxies=None, **kwargs):
|
26 |
"""
|
27 |
@param source: source language to translate from
|
@@ -29,10 +22,6 @@ class MyMemoryTranslator(BaseTranslator):
|
|
29 |
"""
|
30 |
self.__base_url = BASE_URLS.get("MYMEMORY")
|
31 |
self.proxies = proxies
|
32 |
-
if self.is_language_supported(source, target):
|
33 |
-
self._source, self._target = self._map_language_to_code(source.lower(), target.lower())
|
34 |
-
self._source = self._source if self._source != 'auto' else 'Lao'
|
35 |
-
|
36 |
self.email = kwargs.get('email', None)
|
37 |
super(MyMemoryTranslator, self).__init__(base_url=self.__base_url,
|
38 |
source=self._source,
|
@@ -40,41 +29,6 @@ class MyMemoryTranslator(BaseTranslator):
|
|
40 |
payload_key='q',
|
41 |
langpair='{}|{}'.format(self._source, self._target))
|
42 |
|
43 |
-
@staticmethod
|
44 |
-
def get_supported_languages(as_dict=False, **kwargs):
|
45 |
-
"""
|
46 |
-
return the supported languages by the mymemory translator
|
47 |
-
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
48 |
-
@return: list or dict
|
49 |
-
"""
|
50 |
-
return MyMemoryTranslator.supported_languages if not as_dict else MyMemoryTranslator._languages
|
51 |
-
|
52 |
-
def _map_language_to_code(self, *languages):
|
53 |
-
"""
|
54 |
-
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
55 |
-
@param languages: list of languages
|
56 |
-
@return: mapped value of the language or raise an exception if the language is not supported
|
57 |
-
"""
|
58 |
-
for language in languages:
|
59 |
-
if language in self._languages.values() or language == 'auto':
|
60 |
-
yield language
|
61 |
-
elif language in self._languages.keys():
|
62 |
-
yield self._languages[language]
|
63 |
-
else:
|
64 |
-
raise LanguageNotSupportedException(language)
|
65 |
-
|
66 |
-
def is_language_supported(self, *languages):
|
67 |
-
"""
|
68 |
-
check if the language is supported by the translator
|
69 |
-
@param languages: list of languages
|
70 |
-
@return: bool or raise an Exception
|
71 |
-
"""
|
72 |
-
for lang in languages:
|
73 |
-
if lang != 'auto' and lang not in self._languages.keys():
|
74 |
-
if lang != 'auto' and lang not in self._languages.values():
|
75 |
-
raise LanguageNotSupportedException(lang)
|
76 |
-
return True
|
77 |
-
|
78 |
def translate(self, text, return_all=False, **kwargs):
|
79 |
"""
|
80 |
function that uses the mymemory translator to translate a text
|
@@ -84,7 +38,10 @@ class MyMemoryTranslator(BaseTranslator):
|
|
84 |
@return: str or list
|
85 |
"""
|
86 |
|
87 |
-
if self.
|
|
|
|
|
|
|
88 |
text = text.strip()
|
89 |
|
90 |
if self.payload_key:
|
@@ -94,7 +51,6 @@ class MyMemoryTranslator(BaseTranslator):
|
|
94 |
|
95 |
response = requests.get(self.__base_url,
|
96 |
params=self._url_params,
|
97 |
-
headers=self.headers,
|
98 |
proxies=self.proxies)
|
99 |
|
100 |
if response.status_code == 429:
|
@@ -116,32 +72,6 @@ class MyMemoryTranslator(BaseTranslator):
|
|
116 |
next_match = next(matches)
|
117 |
return next_match if not return_all else list(all_matches)
|
118 |
|
119 |
-
def translate_sentences(self, sentences=None, **kwargs):
|
120 |
-
"""
|
121 |
-
translate many sentences together. This makes sense if you have sentences with different languages
|
122 |
-
and you want to translate all to unified language. This is handy because it detects
|
123 |
-
automatically the language of each sentence and then translate it.
|
124 |
-
|
125 |
-
@param sentences: list of sentences to translate
|
126 |
-
@return: list of all translated sentences
|
127 |
-
"""
|
128 |
-
warn_msg = "deprecated. Use the translate_batch function instead"
|
129 |
-
warnings.warn(warn_msg, DeprecationWarning, stacklevel=2)
|
130 |
-
logging.warning(warn_msg)
|
131 |
-
if not sentences:
|
132 |
-
raise NotValidPayload(sentences)
|
133 |
-
|
134 |
-
translated_sentences = []
|
135 |
-
try:
|
136 |
-
for sentence in sentences:
|
137 |
-
translated = self.translate(text=sentence, **kwargs)
|
138 |
-
translated_sentences.append(translated)
|
139 |
-
|
140 |
-
return translated_sentences
|
141 |
-
|
142 |
-
except Exception as e:
|
143 |
-
raise e
|
144 |
-
|
145 |
def translate_file(self, path, **kwargs):
|
146 |
"""
|
147 |
translate directly from file
|
@@ -150,13 +80,7 @@ class MyMemoryTranslator(BaseTranslator):
|
|
150 |
@param kwargs: additional args
|
151 |
@return: str
|
152 |
"""
|
153 |
-
|
154 |
-
with open(path, 'r', encoding='utf-8') as f:
|
155 |
-
text = f.read().strip()
|
156 |
-
|
157 |
-
return self.translate(text=text)
|
158 |
-
except Exception as e:
|
159 |
-
raise e
|
160 |
|
161 |
def translate_batch(self, batch=None, **kwargs):
|
162 |
"""
|
@@ -164,13 +88,4 @@ class MyMemoryTranslator(BaseTranslator):
|
|
164 |
@param batch: list of texts you want to translate
|
165 |
@return: list of translations
|
166 |
"""
|
167 |
-
|
168 |
-
raise Exception("Enter your text list that you want to translate")
|
169 |
-
|
170 |
-
arr = []
|
171 |
-
for text in batch:
|
172 |
-
translated = self.translate(text, **kwargs)
|
173 |
-
arr.append(translated)
|
174 |
-
sleep(2)
|
175 |
-
|
176 |
-
return arr
|
|
|
1 |
"""
|
2 |
mymemory translator API
|
3 |
"""
|
4 |
+
from validate import is_empty, validate_input
|
5 |
+
from .constants import BASE_URLS
|
6 |
+
from .exceptions import (
|
|
|
|
|
7 |
TranslationNotFound,
|
|
|
8 |
RequestError,
|
9 |
TooManyRequests)
|
10 |
from .base import BaseTranslator
|
11 |
import requests
|
|
|
12 |
|
13 |
|
14 |
class MyMemoryTranslator(BaseTranslator):
|
15 |
"""
|
16 |
class that uses the mymemory translator to translate texts
|
17 |
"""
|
|
|
|
|
|
|
18 |
def __init__(self, source="auto", target="en", proxies=None, **kwargs):
|
19 |
"""
|
20 |
@param source: source language to translate from
|
|
|
22 |
"""
|
23 |
self.__base_url = BASE_URLS.get("MYMEMORY")
|
24 |
self.proxies = proxies
|
|
|
|
|
|
|
|
|
25 |
self.email = kwargs.get('email', None)
|
26 |
super(MyMemoryTranslator, self).__init__(base_url=self.__base_url,
|
27 |
source=self._source,
|
|
|
29 |
payload_key='q',
|
30 |
langpair='{}|{}'.format(self._source, self._target))
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
def translate(self, text, return_all=False, **kwargs):
|
33 |
"""
|
34 |
function that uses the mymemory translator to translate a text
|
|
|
38 |
@return: str or list
|
39 |
"""
|
40 |
|
41 |
+
if self._same_source_target() or is_empty(text):
|
42 |
+
return text
|
43 |
+
|
44 |
+
if validate_input(text, max_chars=500):
|
45 |
text = text.strip()
|
46 |
|
47 |
if self.payload_key:
|
|
|
51 |
|
52 |
response = requests.get(self.__base_url,
|
53 |
params=self._url_params,
|
|
|
54 |
proxies=self.proxies)
|
55 |
|
56 |
if response.status_code == 429:
|
|
|
72 |
next_match = next(matches)
|
73 |
return next_match if not return_all else list(all_matches)
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
def translate_file(self, path, **kwargs):
|
76 |
"""
|
77 |
translate directly from file
|
|
|
80 |
@param kwargs: additional args
|
81 |
@return: str
|
82 |
"""
|
83 |
+
return self._translate_file(path, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
def translate_batch(self, batch=None, **kwargs):
|
86 |
"""
|
|
|
88 |
@param batch: list of texts you want to translate
|
89 |
@return: list of translations
|
90 |
"""
|
91 |
+
return self._translate_batch(batch, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
deep_translator/papago.py
CHANGED
@@ -10,12 +10,10 @@ import warnings
|
|
10 |
import logging
|
11 |
|
12 |
|
13 |
-
class PapagoTranslator(
|
14 |
"""
|
15 |
class that wraps functions, which use google translate under the hood to translate text(s)
|
16 |
"""
|
17 |
-
_languages = PAPAGO_LANGUAGE_TO_CODE
|
18 |
-
supported_languages = list(_languages.keys())
|
19 |
|
20 |
def __init__(self, client_id=None, secret_key=None, source="auto", target="en", **kwargs):
|
21 |
"""
|
@@ -29,44 +27,12 @@ class PapagoTranslator(object):
|
|
29 |
self.__base_url = BASE_URLS.get("PAPAGO_API")
|
30 |
self.client_id = client_id
|
31 |
self.secret_key = secret_key
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
"""
|
39 |
-
return the supported languages by the google translator
|
40 |
-
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
41 |
-
@return: list or dict
|
42 |
-
"""
|
43 |
-
return PapagoTranslator.supported_languages if not as_dict else PapagoTranslator._languages
|
44 |
-
|
45 |
-
def _map_language_to_code(self, *languages):
|
46 |
-
"""
|
47 |
-
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
48 |
-
@param languages: list of languages
|
49 |
-
@return: mapped value of the language or raise an exception if the language is not supported
|
50 |
-
"""
|
51 |
-
for language in languages:
|
52 |
-
if language in self._languages.values() or language == 'auto':
|
53 |
-
yield language
|
54 |
-
elif language in self._languages.keys():
|
55 |
-
yield self._languages[language]
|
56 |
-
else:
|
57 |
-
raise LanguageNotSupportedException(language)
|
58 |
-
|
59 |
-
def is_language_supported(self, *languages):
|
60 |
-
"""
|
61 |
-
check if the language is supported by the translator
|
62 |
-
@param languages: list of languages
|
63 |
-
@return: bool or raise an Exception
|
64 |
-
"""
|
65 |
-
for lang in languages:
|
66 |
-
if lang != 'auto' and lang not in self._languages.keys():
|
67 |
-
if lang != 'auto' and lang not in self._languages.values():
|
68 |
-
raise LanguageNotSupportedException(lang)
|
69 |
-
return True
|
70 |
|
71 |
def translate(self, text, **kwargs):
|
72 |
"""
|
@@ -109,38 +75,7 @@ class PapagoTranslator(object):
|
|
109 |
@param kwargs: additional args
|
110 |
@return: str
|
111 |
"""
|
112 |
-
|
113 |
-
with open(path, 'r', encoding='utf-8') as f:
|
114 |
-
text = f.read().strip()
|
115 |
-
return self.translate(text)
|
116 |
-
except Exception as e:
|
117 |
-
raise e
|
118 |
-
|
119 |
-
def translate_sentences(self, sentences=None, **kwargs):
|
120 |
-
"""
|
121 |
-
translate many sentences together. This makes sense if you have sentences with different languages
|
122 |
-
and you want to translate all to unified language. This is handy because it detects
|
123 |
-
automatically the language of each sentence and then translate it.
|
124 |
-
|
125 |
-
@param sentences: list of sentences to translate
|
126 |
-
@return: list of all translated sentences
|
127 |
-
"""
|
128 |
-
warnings.warn("deprecated. Use the translate_batch function instead",
|
129 |
-
DeprecationWarning, stacklevel=2)
|
130 |
-
logging.warning("deprecated. Use the translate_batch function instead")
|
131 |
-
if not sentences:
|
132 |
-
raise NotValidPayload(sentences)
|
133 |
-
|
134 |
-
translated_sentences = []
|
135 |
-
try:
|
136 |
-
for sentence in sentences:
|
137 |
-
translated = self.translate(text=sentence)
|
138 |
-
translated_sentences.append(translated)
|
139 |
-
|
140 |
-
return translated_sentences
|
141 |
-
|
142 |
-
except Exception as e:
|
143 |
-
raise e
|
144 |
|
145 |
def translate_batch(self, batch=None, **kwargs):
|
146 |
"""
|
@@ -148,14 +83,4 @@ class PapagoTranslator(object):
|
|
148 |
@param batch: list of texts you want to translate
|
149 |
@return: list of translations
|
150 |
"""
|
151 |
-
|
152 |
-
raise Exception("Enter your text list that you want to translate")
|
153 |
-
arr = []
|
154 |
-
for i, text in enumerate(batch):
|
155 |
-
|
156 |
-
translated = self.translate(text, **kwargs)
|
157 |
-
arr.append(translated)
|
158 |
-
return arr
|
159 |
-
|
160 |
-
|
161 |
-
BaseTranslator.register(PapagoTranslator)
|
|
|
10 |
import logging
|
11 |
|
12 |
|
13 |
+
class PapagoTranslator(BaseTranslator):
|
14 |
"""
|
15 |
class that wraps functions, which use google translate under the hood to translate text(s)
|
16 |
"""
|
|
|
|
|
17 |
|
18 |
def __init__(self, client_id=None, secret_key=None, source="auto", target="en", **kwargs):
|
19 |
"""
|
|
|
27 |
self.__base_url = BASE_URLS.get("PAPAGO_API")
|
28 |
self.client_id = client_id
|
29 |
self.secret_key = secret_key
|
30 |
+
super().__init__(
|
31 |
+
source=source,
|
32 |
+
target=target,
|
33 |
+
languages=PAPAGO_LANGUAGE_TO_CODE,
|
34 |
+
**kwargs
|
35 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
def translate(self, text, **kwargs):
|
38 |
"""
|
|
|
75 |
@param kwargs: additional args
|
76 |
@return: str
|
77 |
"""
|
78 |
+
return self._translate_file(path, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
def translate_batch(self, batch=None, **kwargs):
|
81 |
"""
|
|
|
83 |
@param batch: list of texts you want to translate
|
84 |
@return: list of translations
|
85 |
"""
|
86 |
+
return self._translate_batch(batch, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
deep_translator/pons.py
CHANGED
@@ -3,6 +3,8 @@ pons translator API
|
|
3 |
"""
|
4 |
from bs4 import BeautifulSoup
|
5 |
import requests
|
|
|
|
|
6 |
from .constants import BASE_URLS, PONS_LANGUAGES_TO_CODES, PONS_CODES_TO_LANGUAGES
|
7 |
from .exceptions import (LanguageNotSupportedException,
|
8 |
TranslationNotFound,
|
@@ -18,8 +20,6 @@ class PonsTranslator(BaseTranslator):
|
|
18 |
"""
|
19 |
class that uses PONS translator to translate words
|
20 |
"""
|
21 |
-
_languages = PONS_LANGUAGES_TO_CODES
|
22 |
-
supported_languages = list(_languages.keys())
|
23 |
|
24 |
def __init__(self, source, target="en", proxies=None, **kwargs):
|
25 |
"""
|
@@ -28,10 +28,8 @@ class PonsTranslator(BaseTranslator):
|
|
28 |
"""
|
29 |
self.__base_url = BASE_URLS.get("PONS")
|
30 |
self.proxies = proxies
|
31 |
-
if self.is_language_supported(source, target):
|
32 |
-
self._source, self._target = self._map_language_to_code(source, target)
|
33 |
-
|
34 |
super().__init__(base_url=self.__base_url,
|
|
|
35 |
source=self._source,
|
36 |
target=self._target,
|
37 |
payload_key=None,
|
@@ -39,41 +37,6 @@ class PonsTranslator(BaseTranslator):
|
|
39 |
element_query={"class": "target"}
|
40 |
)
|
41 |
|
42 |
-
@staticmethod
|
43 |
-
def get_supported_languages(as_dict=False, **kwargs):
|
44 |
-
"""
|
45 |
-
return the supported languages by the linguee translator
|
46 |
-
@param as_dict: if True, the languages will be returned as a dictionary mapping languages to their abbreviations
|
47 |
-
@return: list or dict
|
48 |
-
"""
|
49 |
-
return PonsTranslator.supported_languages if not as_dict else PonsTranslator._languages
|
50 |
-
|
51 |
-
def _map_language_to_code(self, *languages, **kwargs):
|
52 |
-
"""
|
53 |
-
map language to its corresponding code (abbreviation) if the language was passed by its full name by the user
|
54 |
-
@param languages: list of languages
|
55 |
-
@return: mapped value of the language or raise an exception if the language is not supported
|
56 |
-
"""
|
57 |
-
for language in languages:
|
58 |
-
if language in self._languages.values():
|
59 |
-
yield PONS_CODES_TO_LANGUAGES[language]
|
60 |
-
elif language in self._languages.keys():
|
61 |
-
yield language
|
62 |
-
else:
|
63 |
-
raise LanguageNotSupportedException(language)
|
64 |
-
|
65 |
-
def is_language_supported(self, *languages, **kwargs):
|
66 |
-
"""
|
67 |
-
check if the language is supported by the translator
|
68 |
-
@param languages: list of languages
|
69 |
-
@return: bool or raise an Exception
|
70 |
-
"""
|
71 |
-
for lang in languages:
|
72 |
-
if lang not in self._languages.keys():
|
73 |
-
if lang not in self._languages.values():
|
74 |
-
raise LanguageNotSupportedException(lang)
|
75 |
-
return True
|
76 |
-
|
77 |
def translate(self, word, return_all=False, **kwargs):
|
78 |
"""
|
79 |
function that uses PONS to translate a word
|
@@ -83,7 +46,10 @@ class PonsTranslator(BaseTranslator):
|
|
83 |
@type return_all: bool
|
84 |
@return: str: translated word
|
85 |
"""
|
86 |
-
if self.
|
|
|
|
|
|
|
87 |
url = "{}{}-{}/{}".format(self.__base_url, self._source, self._target, word)
|
88 |
url = requote_uri(url)
|
89 |
response = requests.get(url, proxies=self.proxies)
|
|
|
3 |
"""
|
4 |
from bs4 import BeautifulSoup
|
5 |
import requests
|
6 |
+
|
7 |
+
from validate import validate_input, is_empty
|
8 |
from .constants import BASE_URLS, PONS_LANGUAGES_TO_CODES, PONS_CODES_TO_LANGUAGES
|
9 |
from .exceptions import (LanguageNotSupportedException,
|
10 |
TranslationNotFound,
|
|
|
20 |
"""
|
21 |
class that uses PONS translator to translate words
|
22 |
"""
|
|
|
|
|
23 |
|
24 |
def __init__(self, source, target="en", proxies=None, **kwargs):
|
25 |
"""
|
|
|
28 |
"""
|
29 |
self.__base_url = BASE_URLS.get("PONS")
|
30 |
self.proxies = proxies
|
|
|
|
|
|
|
31 |
super().__init__(base_url=self.__base_url,
|
32 |
+
languages=PONS_LANGUAGES_TO_CODES,
|
33 |
source=self._source,
|
34 |
target=self._target,
|
35 |
payload_key=None,
|
|
|
37 |
element_query={"class": "target"}
|
38 |
)
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
def translate(self, word, return_all=False, **kwargs):
|
41 |
"""
|
42 |
function that uses PONS to translate a word
|
|
|
46 |
@type return_all: bool
|
47 |
@return: str: translated word
|
48 |
"""
|
49 |
+
if self._same_source_target() or is_empty(word):
|
50 |
+
return word
|
51 |
+
|
52 |
+
if validate_input(word, max_chars=50):
|
53 |
url = "{}{}-{}/{}".format(self.__base_url, self._source, self._target, word)
|
54 |
url = requote_uri(url)
|
55 |
response = requests.get(url, proxies=self.proxies)
|
deep_translator/qcri.py
CHANGED
@@ -5,7 +5,7 @@ from .exceptions import (ServerException, TranslationNotFound)
|
|
5 |
from .base import BaseTranslator
|
6 |
|
7 |
|
8 |
-
class QCRI(
|
9 |
"""
|
10 |
class that wraps functions, which use the QRCI translator under the hood to translate word(s)
|
11 |
"""
|
@@ -18,8 +18,6 @@ class QCRI(object):
|
|
18 |
if not api_key:
|
19 |
raise ServerException(401)
|
20 |
self.__base_url = BASE_URLS.get("QCRI")
|
21 |
-
self.source = source
|
22 |
-
self.target = target
|
23 |
self.api_key = api_key
|
24 |
self.api_endpoints = {
|
25 |
"get_languages": "getLanguagePairs",
|
@@ -30,6 +28,12 @@ class QCRI(object):
|
|
30 |
self.params = {
|
31 |
"key": self.api_key
|
32 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
def _get(self, endpoint, params=None, return_text=True):
|
35 |
if not params:
|
@@ -41,14 +45,6 @@ class QCRI(object):
|
|
41 |
except Exception as e:
|
42 |
raise e
|
43 |
|
44 |
-
@staticmethod
|
45 |
-
def get_supported_languages(as_dict=False, **kwargs):
|
46 |
-
# Have no use for this as the format is not what we need
|
47 |
-
# Save this for whenever
|
48 |
-
# pairs = self._get("get_languages")
|
49 |
-
# Using a this one instead
|
50 |
-
return [*QCRI_LANGUAGE_TO_CODE.keys()] if not as_dict else QCRI_LANGUAGE_TO_CODE
|
51 |
-
|
52 |
@property
|
53 |
def languages(self):
|
54 |
return self.get_supported_languages()
|
@@ -61,11 +57,11 @@ class QCRI(object):
|
|
61 |
def domains(self):
|
62 |
return self.get_domains()
|
63 |
|
64 |
-
def translate(self, text,
|
65 |
params = {
|
66 |
"key": self.api_key,
|
67 |
-
"langpair": "{}-{}".format(self.
|
68 |
-
"domain": domain,
|
69 |
"text": text
|
70 |
}
|
71 |
try:
|
@@ -83,14 +79,14 @@ class QCRI(object):
|
|
83 |
raise TranslationNotFound(text)
|
84 |
return translation
|
85 |
|
86 |
-
def
|
|
|
|
|
|
|
87 |
"""
|
88 |
translate a batch of texts
|
89 |
@domain: domain
|
90 |
@param batch: list of texts to translate
|
91 |
@return: list of translations
|
92 |
"""
|
93 |
-
return
|
94 |
-
|
95 |
-
|
96 |
-
BaseTranslator.register(QCRI)
|
|
|
5 |
from .base import BaseTranslator
|
6 |
|
7 |
|
8 |
+
class QCRI(BaseTranslator):
|
9 |
"""
|
10 |
class that wraps functions, which use the QRCI translator under the hood to translate word(s)
|
11 |
"""
|
|
|
18 |
if not api_key:
|
19 |
raise ServerException(401)
|
20 |
self.__base_url = BASE_URLS.get("QCRI")
|
|
|
|
|
21 |
self.api_key = api_key
|
22 |
self.api_endpoints = {
|
23 |
"get_languages": "getLanguagePairs",
|
|
|
28 |
self.params = {
|
29 |
"key": self.api_key
|
30 |
}
|
31 |
+
super().__init__(
|
32 |
+
source=source,
|
33 |
+
target=target,
|
34 |
+
languages=QCRI_LANGUAGE_TO_CODE,
|
35 |
+
**kwargs
|
36 |
+
)
|
37 |
|
38 |
def _get(self, endpoint, params=None, return_text=True):
|
39 |
if not params:
|
|
|
45 |
except Exception as e:
|
46 |
raise e
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
@property
|
49 |
def languages(self):
|
50 |
return self.get_supported_languages()
|
|
|
57 |
def domains(self):
|
58 |
return self.get_domains()
|
59 |
|
60 |
+
def translate(self, text, **kwargs):
|
61 |
params = {
|
62 |
"key": self.api_key,
|
63 |
+
"langpair": "{}-{}".format(self._source, self._target),
|
64 |
+
"domain": kwargs["domain"],
|
65 |
"text": text
|
66 |
}
|
67 |
try:
|
|
|
79 |
raise TranslationNotFound(text)
|
80 |
return translation
|
81 |
|
82 |
+
def translate_file(self, path, **kwargs):
|
83 |
+
return self._translate_file(path, **kwargs)
|
84 |
+
|
85 |
+
def translate_batch(self, batch, **kwargs):
|
86 |
"""
|
87 |
translate a batch of texts
|
88 |
@domain: domain
|
89 |
@param batch: list of texts to translate
|
90 |
@return: list of translations
|
91 |
"""
|
92 |
+
return self._translate_batch(batch, **kwargs)
|
|
|
|
|
|
deep_translator/validate.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from .exceptions import NotValidPayload, NotValidLength
|
3 |
+
import string
|
4 |
+
|
5 |
+
|
6 |
+
def is_empty(text: str):
|
7 |
+
return text.strip() == ""
|
8 |
+
|
9 |
+
|
10 |
+
def validate_input(text: str, min_chars: int = 1, max_chars: int = 5000):
|
11 |
+
"""
|
12 |
+
validate the target text to translate
|
13 |
+
@param min_chars: min characters
|
14 |
+
@param max_chars: max characters
|
15 |
+
@param text: text to translate
|
16 |
+
@return: bool
|
17 |
+
"""
|
18 |
+
|
19 |
+
if not isinstance(text, str) or not text.strip() or text.isdigit():
|
20 |
+
raise NotValidPayload(text)
|
21 |
+
|
22 |
+
# check if payload contains only symbols
|
23 |
+
if all(i in string.punctuation for i in text):
|
24 |
+
raise NotValidPayload(text)
|
25 |
+
|
26 |
+
if not min_chars <= len(text) < max_chars:
|
27 |
+
raise NotValidLength(text, min_chars, max_chars)
|
28 |
+
|
29 |
+
return True
|
deep_translator/yandex.py
CHANGED
@@ -8,7 +8,7 @@ from .exceptions import (RequestError, ServerException,
|
|
8 |
from .base import BaseTranslator
|
9 |
|
10 |
|
11 |
-
class YandexTranslator(
|
12 |
"""
|
13 |
class that wraps functions, which use the yandex translator under the hood to translate word(s)
|
14 |
"""
|
@@ -20,9 +20,6 @@ class YandexTranslator(object):
|
|
20 |
if not api_key:
|
21 |
raise ServerException(401)
|
22 |
self.__base_url = BASE_URLS.get("YANDEX")
|
23 |
-
self.source = source
|
24 |
-
self.target = target
|
25 |
-
|
26 |
self.api_key = api_key
|
27 |
self.api_version = "v1.5"
|
28 |
self.api_endpoints = {
|
@@ -30,13 +27,11 @@ class YandexTranslator(object):
|
|
30 |
"detect": "detect",
|
31 |
"translate": "translate",
|
32 |
}
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
supported languages using the languages property or call _get_supported_languages
|
39 |
-
"""
|
40 |
|
41 |
def _get_supported_languages(self):
|
42 |
return set(x.split("-")[0] for x in self.dirs)
|
@@ -95,7 +90,7 @@ class YandexTranslator(object):
|
|
95 |
params = {
|
96 |
"text": text,
|
97 |
"format": "plain",
|
98 |
-
"lang": self.
|
99 |
"key": self.api_key
|
100 |
}
|
101 |
try:
|
@@ -124,13 +119,7 @@ class YandexTranslator(object):
|
|
124 |
@param path: path to file
|
125 |
@return: translated text
|
126 |
"""
|
127 |
-
|
128 |
-
with open(path, 'r', encoding='utf-8') as f:
|
129 |
-
text = f.read()
|
130 |
-
|
131 |
-
return self.translate(text)
|
132 |
-
except Exception as e:
|
133 |
-
raise e
|
134 |
|
135 |
def translate_batch(self, batch, **kwargs):
|
136 |
"""
|
@@ -138,7 +127,5 @@ class YandexTranslator(object):
|
|
138 |
@param batch: list of texts to translate
|
139 |
@return: list of translations
|
140 |
"""
|
141 |
-
return
|
142 |
-
|
143 |
|
144 |
-
BaseTranslator.register(YandexTranslator)
|
|
|
8 |
from .base import BaseTranslator
|
9 |
|
10 |
|
11 |
+
class YandexTranslator(BaseTranslator):
|
12 |
"""
|
13 |
class that wraps functions, which use the yandex translator under the hood to translate word(s)
|
14 |
"""
|
|
|
20 |
if not api_key:
|
21 |
raise ServerException(401)
|
22 |
self.__base_url = BASE_URLS.get("YANDEX")
|
|
|
|
|
|
|
23 |
self.api_key = api_key
|
24 |
self.api_version = "v1.5"
|
25 |
self.api_endpoints = {
|
|
|
27 |
"detect": "detect",
|
28 |
"translate": "translate",
|
29 |
}
|
30 |
+
super().__init__(
|
31 |
+
source=source,
|
32 |
+
target=target,
|
33 |
+
**kwargs
|
34 |
+
)
|
|
|
|
|
35 |
|
36 |
def _get_supported_languages(self):
|
37 |
return set(x.split("-")[0] for x in self.dirs)
|
|
|
90 |
params = {
|
91 |
"text": text,
|
92 |
"format": "plain",
|
93 |
+
"lang": self._target if self._source == "auto" else "{}-{}".format(self._source, self._target),
|
94 |
"key": self.api_key
|
95 |
}
|
96 |
try:
|
|
|
119 |
@param path: path to file
|
120 |
@return: translated text
|
121 |
"""
|
122 |
+
return self._translate_file(path, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
def translate_batch(self, batch, **kwargs):
|
125 |
"""
|
|
|
127 |
@param batch: list of texts to translate
|
128 |
@return: list of translations
|
129 |
"""
|
130 |
+
return self._translate_batch(batch, **kwargs)
|
|
|
131 |
|
|