harry commited on
Commit
26a1783
·
1 Parent(s): 5d13bbd

add baidu translator

Browse files
deep_translator/__init__.py CHANGED
@@ -2,6 +2,7 @@
2
 
3
  __copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
4
 
 
5
  from deep_translator.chatgpt import ChatGptTranslator
6
  from deep_translator.deepl import DeeplTranslator
7
  from deep_translator.detection import batch_detection, single_detection
@@ -31,6 +32,7 @@ __all__ = [
31
  "LibreTranslator",
32
  "PapagoTranslator",
33
  "ChatGptTranslator",
 
34
  "single_detection",
35
  "batch_detection",
36
  ]
 
2
 
3
  __copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
4
 
5
+ from deep_translator.baidu import BaiduTranslator
6
  from deep_translator.chatgpt import ChatGptTranslator
7
  from deep_translator.deepl import DeeplTranslator
8
  from deep_translator.detection import batch_detection, single_detection
 
32
  "LibreTranslator",
33
  "PapagoTranslator",
34
  "ChatGptTranslator",
35
+ "BaiduTranslator",
36
  "single_detection",
37
  "batch_detection",
38
  ]
deep_translator/baidu.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ baidu translator API
3
+ """
4
+
5
+ __copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
6
+
7
+ import hashlib
8
+ import os
9
+ import random
10
+ from typing import List, Optional
11
+
12
+ import requests
13
+
14
+ from deep_translator.base import BaseTranslator
15
+ from deep_translator.constants import (
16
+ BAIDU_ID_VAR,
17
+ BAIDU_KEY_VAR,
18
+ BAIDU_LANGUAGE_TO_CODE,
19
+ BASE_URLS,
20
+ )
21
+ from deep_translator.exceptions import (
22
+ ApiKeyException,
23
+ ServerException,
24
+ TencentAPIerror,
25
+ TranslationNotFound,
26
+ )
27
+ from deep_translator.validate import is_empty, is_input_valid
28
+
29
+
30
+ class BaiduTranslator(BaseTranslator):
31
+ """
32
+ class that wraps functions, which use the TentCentTranslator translator
33
+ under the hood to translate word(s)
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ source: str = "en",
39
+ target: str = "zh",
40
+ secret_id: Optional[str] = os.getenv(BAIDU_ID_VAR, None),
41
+ secret_key: Optional[str] = os.getenv(BAIDU_KEY_VAR, None),
42
+ **kwargs
43
+ ):
44
+ """
45
+ @param secret_id: your baidu cloud api secret id.
46
+ Get one here: https://fanyi-api.baidu.com/choose
47
+ @param secret_key: your baidu cloud api secret key.
48
+ @param source: source language
49
+ @param target: target language
50
+ """
51
+ if not secret_id:
52
+ raise ApiKeyException(env_var=BAIDU_ID_VAR)
53
+
54
+ if not secret_key:
55
+ raise ApiKeyException(env_var=BAIDU_KEY_VAR)
56
+
57
+ self.secret_id = secret_id
58
+ self.secret_key = secret_key
59
+ super().__init__(
60
+ base_url=BASE_URLS.get("BAIDU"),
61
+ source=source,
62
+ target=target,
63
+ languages=BAIDU_LANGUAGE_TO_CODE,
64
+ **kwargs
65
+ )
66
+
67
+ def translate(self, text: str, **kwargs) -> str:
68
+ """
69
+ @param text: text to translate
70
+ @return: translated text
71
+ """
72
+ if is_input_valid(text):
73
+ if self._same_source_target() or is_empty(text):
74
+ return text
75
+
76
+ # Create the request parameters.
77
+ salt = random.randint(32768, 65536)
78
+ sign = hashlib.md5(
79
+ (self.secret_id + text + str(salt) + self.secret_key).encode(
80
+ "utf-8"
81
+ )
82
+ ).hexdigest()
83
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
84
+ payload = {
85
+ "appid": self.secret_id,
86
+ "q": text,
87
+ "from": self.source,
88
+ "to": self.target,
89
+ "salt": salt,
90
+ "sign": sign,
91
+ }
92
+
93
+ # Do the request and check the connection.
94
+ try:
95
+ response = requests.post(
96
+ self._base_url, params=payload, headers=headers
97
+ )
98
+ except ConnectionError:
99
+ raise ServerException(503)
100
+ if response.status_code != 200:
101
+ raise ServerException(response.status_code)
102
+ # Get the response and check is not empty.
103
+ res = response.json()
104
+ if not res:
105
+ raise TranslationNotFound(text)
106
+ # Process and return the response.
107
+ if "error_code" in res:
108
+ raise TencentAPIerror(res["error_msg"])
109
+ return res["trans_result"]["dst"]
110
+
111
+ def translate_file(self, path: str, **kwargs) -> str:
112
+ return self._translate_file(path, **kwargs)
113
+
114
+ def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
115
+ """
116
+ @param batch: list of texts to translate
117
+ @return: list of translations
118
+ """
119
+ return self._translate_batch(batch, **kwargs)
120
+
121
+
122
+ if __name__ == "__main__":
123
+ d = BaiduTranslator(
124
+ target="zh", secret_id="some-id", secret_key="some-key"
125
+ )
126
+ t = d.translate("Ich habe keine ahnung")
127
+ print("text: ", t)
deep_translator/constants.py CHANGED
@@ -7,6 +7,8 @@ LIBRE_ENV_VAR = "LIBRE_API_KEY"
7
  MSFT_ENV_VAR = "MICROSOFT_API_KEY"
8
  QCRI_ENV_VAR = "QCRI_API_KEY"
9
  YANDEX_ENV_VAR = "YANDEX_API_KEY"
 
 
10
 
11
 
12
  BASE_URLS = {
@@ -23,6 +25,7 @@ BASE_URLS = {
23
  "PAPAGO_API": "https://openapi.naver.com/v1/papago/n2mt",
24
  "LIBRE": "https://libretranslate.com/",
25
  "LIBRE_FREE": "https://libretranslate.de/",
 
26
  }
27
 
28
  GOOGLE_LANGUAGES_TO_CODES = {
@@ -280,3 +283,23 @@ LIBRE_LANGUAGES_TO_CODES = {
280
  "Turkish": "tr",
281
  "Vietnamese": "vi",
282
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  MSFT_ENV_VAR = "MICROSOFT_API_KEY"
8
  QCRI_ENV_VAR = "QCRI_API_KEY"
9
  YANDEX_ENV_VAR = "YANDEX_API_KEY"
10
+ BAIDU_ID_VAR = "BAIDU_ID"
11
+ BAIDU_KEY_VAR = "BAIDU_KEY"
12
 
13
 
14
  BASE_URLS = {
 
25
  "PAPAGO_API": "https://openapi.naver.com/v1/papago/n2mt",
26
  "LIBRE": "https://libretranslate.com/",
27
  "LIBRE_FREE": "https://libretranslate.de/",
28
+ "BAIDU": "https://fanyi-api.baidu.com/api/trans/vip/translate",
29
  }
30
 
31
  GOOGLE_LANGUAGES_TO_CODES = {
 
283
  "Turkish": "tr",
284
  "Vietnamese": "vi",
285
  }
286
+
287
+ BAIDU_LANGUAGE_TO_CODE = {
288
+ "arabic": "ar",
289
+ "chinese (simplified)": "zh",
290
+ "chinese (traditional)": "zh-TW",
291
+ "english": "en",
292
+ "french": "fr",
293
+ "german": "de",
294
+ "hindi": "hi",
295
+ "indonesian": "id",
296
+ "japanese": "ja",
297
+ "korean": "ko",
298
+ "malay": "ms",
299
+ "portuguese": "pt",
300
+ "russian": "ru",
301
+ "spanish": "es",
302
+ "thai": "th",
303
+ "turkish": "tr",
304
+ "vietnamese": "vi",
305
+ }
deep_translator/exceptions.py CHANGED
@@ -180,3 +180,16 @@ class AuthorizationException(Exception):
180
  def __init__(self, api_key, *args):
181
  msg = "Unauthorized access with the api key " + api_key
182
  super().__init__(msg, *args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  def __init__(self, api_key, *args):
181
  msg = "Unauthorized access with the api key " + api_key
182
  super().__init__(msg, *args)
183
+
184
+
185
+ class BaiduAPIerror(Exception):
186
+ """
187
+ exception thrown if Baidu API returns one of its errors
188
+ """
189
+
190
+ def __init__(self, api_message):
191
+ self.api_message = str(api_message)
192
+ self.message = "Baidu API returned the following error"
193
+
194
+ def __str__(self):
195
+ return "{}: {}".format(self.message, self.api_message)
tests/test_baidu.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from unittest.mock import Mock, patch
2
+
3
+ import pytest
4
+
5
+ from deep_translator import BaiduTranslator
6
+ from deep_translator.exceptions import AuthorizationException
7
+
8
+
9
+ @patch("deep_translator.baidu.requests")
10
+ def test_simple_translation(mock_requests):
11
+ translator = BaiduTranslator(
12
+ secret_id="this-is-an-valid-api-id",
13
+ source="en",
14
+ target="zh",
15
+ secret_key="this-is-an-valid-api-key",
16
+ )
17
+ # Set the request response mock.
18
+ mock_response = Mock()
19
+ mock_response.status_code = 200
20
+ mock_response.json.return_value = {"translations": [{"text": "hola"}]}
21
+ mock_requests.get.return_value = mock_response
22
+ translation = translator.translate("hello")
23
+ assert translation == "你好"
24
+
25
+
26
+ @patch("deep_translator.tencent.requests.get")
27
+ def test_wrong_api_key(mock_requests):
28
+ translator = BaiduTranslator(
29
+ secret_id="this-is-a-wrong-api-id",
30
+ source="en",
31
+ target="zh",
32
+ secret_key="this-is-a-wrong-api-key",
33
+ )
34
+ # Set the response status_code only.
35
+ mock_requests.return_value = Mock(status_code=403)
36
+ with pytest.raises(AuthorizationException):
37
+ translator.translate("Hello")