= commited on
Commit
f06c14e
·
1 Parent(s): 00d0b51

added pons translator support

Browse files
deep_translator/constants.py CHANGED
@@ -1,7 +1,7 @@
1
 
2
  BASE_URLS = {
3
  "GOOGLE_TRANSLATE": "https://translate.google.com/m",
4
- "PONS": "https://en.pons.com/translate"
5
  }
6
 
7
  CODES_TO_LANGUAGES = {
 
1
 
2
  BASE_URLS = {
3
  "GOOGLE_TRANSLATE": "https://translate.google.com/m",
4
+ "PONS": "https://en.pons.com/translate/"
5
  }
6
 
7
  CODES_TO_LANGUAGES = {
deep_translator/deep_translator.py CHANGED
@@ -1,38 +1,45 @@
1
  """Main module."""
2
 
3
- try:
4
- from bs4 import BeautifulSoup
5
- except:
6
- raise Exception("Import Error")
7
 
 
8
  import requests
9
- from .models import BaseTranslator
10
- from .constants import BASE_URLS, LANGUAGES_TO_CODES
11
- from .exceptions import LanguageNotSupportedException, NotValidPayload
 
 
12
 
13
 
14
- class GoogleTranslator(BaseTranslator):
15
-
 
 
16
  def __init__(self, source="auto", target="en"):
 
 
 
 
17
  self.__base_url = BASE_URLS.get("GOOGLE_TRANSLATE")
18
- super(GoogleTranslator, self).__init__()
19
 
20
  if self._validate_languages([source.lower(), target.lower()]):
21
  self._source = self._map_language_to_code(source.lower())
22
  self._target = self._map_language_to_code(target.lower())
23
 
24
- def _validate_payload(self, payload):
25
- if not isinstance(payload, str):
26
- return False
27
- elif not payload:
28
- return False
29
- elif len(payload) > 5000:
30
- return False
31
- else:
32
- return True
33
 
34
  def _map_language_to_code(self, language):
 
35
 
 
 
 
36
  if language in LANGUAGES_TO_CODES.values() or language == 'auto':
37
  return language
38
  elif language in LANGUAGES_TO_CODES.keys():
@@ -40,34 +47,83 @@ class GoogleTranslator(BaseTranslator):
40
  else:
41
  raise LanguageNotSupportedException(language)
42
 
43
- def _validate_languages(self, languages):
44
- for lang in languages:
45
- if lang != 'auto' and lang not in LANGUAGES_TO_CODES.keys():
46
- if lang != 'auto' and lang not in LANGUAGES_TO_CODES.values():
47
- raise LanguageNotSupportedException(lang)
48
- return True
 
 
 
 
 
 
 
 
49
 
50
- def translate(self, payload):
 
 
51
 
52
- valid = self._validate_payload(payload)
53
- if not valid:
54
- raise NotValidPayload(payload)
 
 
 
55
 
56
- try:
57
- payload = payload.strip()
58
- params = {
59
- "hl": self._target,
60
- "sl": self._source,
61
- "q": payload
62
- }
63
 
64
- res = requests.get(self.__base_url, params=params)
65
- soup = BeautifulSoup(res.text, 'html.parser')
66
- res = soup.find("div", {"class": "t0"})
67
- return res.get_text(strip=True)
 
 
 
 
 
68
 
69
- except Exception as e:
70
- print(e.args)
71
- raise
72
 
 
 
 
 
 
 
 
 
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """Main module."""
2
 
 
 
 
 
3
 
4
+ from bs4 import BeautifulSoup
5
  import requests
6
+ from models import BaseTranslator
7
+ from constants import BASE_URLS, LANGUAGES_TO_CODES, CODES_TO_LANGUAGES
8
+ from exceptions import LanguageNotSupportedException, NotValidPayload, ElementNotFoundInGetRequest, NotValidLength
9
+ from parent import ParentTranslator
10
+ import string
11
 
12
 
13
+ class GoogleTranslator(ParentTranslator):
14
+ """
15
+ class that uses google translate to translate texts
16
+ """
17
  def __init__(self, source="auto", target="en"):
18
+ """
19
+ @param source: source language to translate from
20
+ @param target: target language to translate to
21
+ """
22
  self.__base_url = BASE_URLS.get("GOOGLE_TRANSLATE")
 
23
 
24
  if self._validate_languages([source.lower(), target.lower()]):
25
  self._source = self._map_language_to_code(source.lower())
26
  self._target = self._map_language_to_code(target.lower())
27
 
28
+ super(GoogleTranslator, self).__init__(base_url=self.__base_url,
29
+ source=self._source,
30
+ target=self._target,
31
+ element_tag='div',
32
+ element_query={"class": "t0"},
33
+ hl=self._target,
34
+ sl=self._source,
35
+ q=None)
 
36
 
37
  def _map_language_to_code(self, language):
38
+ """
39
 
40
+ @param language: type of language
41
+ @return: mapped value of the language or raise an exception if the language is not supported
42
+ """
43
  if language in LANGUAGES_TO_CODES.values() or language == 'auto':
44
  return language
45
  elif language in LANGUAGES_TO_CODES.keys():
 
47
  else:
48
  raise LanguageNotSupportedException(language)
49
 
50
+ def translate(self, payload, payload_tag='q'):
51
+ return super().translate(payload, payload_tag)
52
+
53
+
54
+ class PonsTranslator(ParentTranslator):
55
+ """
56
+ class that uses PONS translator to translate words
57
+ """
58
+ def __init__(self, source="french", target="english"):
59
+ """
60
+ @param source: source language to translate from
61
+ @param target: target language to translate to
62
+ """
63
+ self.__base_url = BASE_URLS.get("PONS")
64
 
65
+ if self._validate_languages([source.lower(), target.lower()]):
66
+ self._source = self._map_language_to_code(source.lower())
67
+ self._target = self._map_language_to_code(target.lower())
68
 
69
+ super().__init__(base_url=self.__base_url,
70
+ source=self._source,
71
+ target=self._target,
72
+ element_tag='div',
73
+ element_query={"class": "target"}
74
+ )
75
 
76
+ def _map_language_to_code(self, language):
77
+ """
 
 
 
 
 
78
 
79
+ @param language: type of language
80
+ @return: mapped value of the language or raise an exception if the language is not supported
81
+ """
82
+ if language in LANGUAGES_TO_CODES.values():
83
+ return CODES_TO_LANGUAGES[language]
84
+ elif language in LANGUAGES_TO_CODES.keys():
85
+ return language
86
+ else:
87
+ raise LanguageNotSupportedException(language)
88
 
89
+ def _validate_languages(self, languages):
90
+ """
 
91
 
92
+ @param languages: languages to validate
93
+ @return: True or raise an exception
94
+ """
95
+ for lang in languages:
96
+ if lang not in LANGUAGES_TO_CODES.keys():
97
+ if lang not in LANGUAGES_TO_CODES.values():
98
+ raise LanguageNotSupportedException(lang)
99
+ return True
100
 
101
+ def translate(self, payload, payload_tag=None):
102
+ from requests.utils import quote
103
+ url = "{}{}-{}/{}".format(self.__base_url, self._source, self._target, quote(payload))
104
+ response = requests.get(url)
105
+ soup = BeautifulSoup(response.text, 'html.parser')
106
+ elements = soup.findAll(self._element_tag, self._element_query)
107
+ # elements = soup.body.find_all('a')
108
+ eof = []
109
+ for el in elements:
110
+ temp = ''
111
+ for e in el.findAll('a'):
112
+ if e.parent.name == 'div':
113
+ if e and "/translate/{}-{}/".format(self._target, self._source) in e.get('href'):
114
+ temp += e.get_text() + ' '
115
+ eof.append(temp)
116
+
117
+ return [word for word in eof if word and len(word) > 1]
118
+
119
+
120
+ if __name__ == '__main__':
121
+ # res = GoogleTranslator(source='auto', target='french').translate(payload="A paragraph is a series of related sentences developing a central idea, called the topic. Try to think about paragraphs in terms of thematic unity: a paragraph is a sentence or a group of sentences that supports one central, unified idea. Paragraphs add one idea at a time to your broader argument.")
122
+ # res = GoogleTranslator(source='auto', target='french').translate_text(path='../examples/test.txt')
123
+ # res = GoogleTranslator(source='auto', target='french').translate_sentences([
124
+ # "this is good",
125
+ # "das Wetter ist schön",
126
+ # "un verme verde in un bicchiere verde"
127
+ # ])
128
+ res = PonsTranslator(source="english", target="arabic").translate(payload='good')
129
+ print(res)
deep_translator/exceptions.py CHANGED
@@ -20,3 +20,14 @@ class NotValidPayload(BaseError):
20
  message='payload must be a valid text with maximum 5000 character, otherwise it cannot be translated'):
21
  super(NotValidPayload, self).__init__(val, message)
22
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  message='payload must be a valid text with maximum 5000 character, otherwise it cannot be translated'):
21
  super(NotValidPayload, self).__init__(val, message)
22
 
23
+
24
+ class ElementNotFoundInGetRequest(BaseError):
25
+ def __init__(self,
26
+ val,
27
+ message='Element was not found in the get request.'):
28
+ super(ElementNotFoundInGetRequest, self).__init__(val, message)
29
+
30
+
31
+ class NotValidLength(BaseError):
32
+ def __init__(self, val, message="Length of payload need to be between 0 and 5000"):
33
+ super(NotValidLength, self).__init__(val, message)
deep_translator/models.py CHANGED
@@ -10,7 +10,7 @@ class BaseTranslator(ABC):
10
  pass
11
 
12
  @abstractmethod
13
- def translate(self, payload):
14
  pass
15
 
16
 
 
10
  pass
11
 
12
  @abstractmethod
13
+ def translate(self, payload, payload_tag):
14
  pass
15
 
16
 
deep_translator/parent.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Main module."""
2
+
3
+
4
+ from bs4 import BeautifulSoup
5
+ import requests
6
+ from models import BaseTranslator
7
+ from constants import LANGUAGES_TO_CODES
8
+ from exceptions import LanguageNotSupportedException, NotValidPayload, ElementNotFoundInGetRequest, NotValidLength
9
+
10
+
11
+ class ParentTranslator(BaseTranslator):
12
+ """
13
+ class that serve as a parent translator class for other different translators
14
+ """
15
+ def __init__(self,
16
+ base_url=None,
17
+ source="auto",
18
+ target="en",
19
+ element_tag=None,
20
+ element_query=None,
21
+ **url_params):
22
+ """
23
+ @param source: source language to translate from
24
+ @param target: target language to translate to
25
+ """
26
+ self.__base_url = base_url
27
+ self._source = source
28
+ self._target = target
29
+ self._url_params = url_params
30
+ self._element_tag = element_tag
31
+ self._element_query = element_query
32
+ super(ParentTranslator, self).__init__()
33
+
34
+ def _validate_payload(self, payload):
35
+ """
36
+ validate the payload text to translate
37
+ @param payload: text to translate
38
+ @return: bool
39
+ """
40
+ if not payload or not isinstance(payload, str):
41
+ return False
42
+
43
+ return True
44
+
45
+ def _check_length(self, payload, min_chars=0, max_chars=5000):
46
+ return True if min_chars < len(payload) < max_chars else False
47
+
48
+ def _validate_languages(self, languages):
49
+ """
50
+
51
+ @param languages: languages to validate
52
+ @return: True or raise an exception
53
+ """
54
+ for lang in languages:
55
+ if lang != 'auto' and lang not in LANGUAGES_TO_CODES.keys():
56
+ if lang != 'auto' and lang not in LANGUAGES_TO_CODES.values():
57
+ raise LanguageNotSupportedException(lang)
58
+ return True
59
+
60
+ def translate(self, payload, payload_tag):
61
+ """
62
+ main function that uses google translate to translate a text
63
+ @param payload: desired text to translate
64
+ @param payload_tag: tag of the payload in the url parameters
65
+ @return: str: translated text
66
+ """
67
+
68
+ if not self._validate_payload(payload):
69
+ raise NotValidPayload(payload)
70
+
71
+ if not self._check_length(payload):
72
+ raise NotValidLength(payload)
73
+
74
+ try:
75
+ payload = payload.strip()
76
+
77
+ if payload_tag in self._url_params.keys():
78
+ self._url_params[payload_tag] = payload
79
+
80
+ res = requests.get(self.__base_url, params=self._url_params)
81
+ soup = BeautifulSoup(res.text, 'html.parser')
82
+ element = soup.find(self._element_tag, self._element_query)
83
+ if not element:
84
+ raise ElementNotFoundInGetRequest(element)
85
+
86
+ return element.get_text(strip=True)
87
+
88
+ except Exception as e:
89
+ print(e.args)
90
+ raise
91
+
92
+ def translate_file(self, path):
93
+ try:
94
+ with open(path) as f:
95
+ text = f.read()
96
+
97
+ return self.translate(payload=text)
98
+ except Exception as e:
99
+ raise e
100
+
101
+ def translate_sentences(self, sentences=None):
102
+ """
103
+ translate many sentences together. This makes sense if you have sentences with different languages
104
+ and you want to translate all to unified language. This is handy because it detects
105
+ automatically the language of each sentence and then translate it.
106
+
107
+ @param sentences: list of sentences to translate
108
+ @return: list of all translated sentences
109
+ """
110
+ if not sentences:
111
+ raise NotValidPayload
112
+
113
+ translated_sentences = []
114
+ try:
115
+ for sentence in sentences:
116
+ translated = self.translate(payload=sentence)
117
+ translated_sentences.append(translated)
118
+
119
+ return translated_sentences
120
+
121
+ except Exception as e:
122
+ raise e