|
|
|
|
|
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri" |
|
|
|
import logging |
|
import os |
|
import sys |
|
from typing import List, Optional |
|
|
|
import requests |
|
|
|
from deep_translator.base import BaseTranslator |
|
from deep_translator.constants import BASE_URLS, MSFT_ENV_VAR |
|
from deep_translator.exceptions import ( |
|
ApiKeyException, |
|
MicrosoftAPIerror, |
|
TranslationNotFound, |
|
) |
|
from deep_translator.validate import is_input_valid |
|
|
|
|
|
class MicrosoftTranslator(BaseTranslator): |
|
""" |
|
the class that wraps functions, which use the Microsoft translator under the hood to translate word(s) |
|
""" |
|
|
|
def __init__( |
|
self, |
|
source: str = "auto", |
|
target: str = "en", |
|
api_key: Optional[str] = os.getenv(MSFT_ENV_VAR, None), |
|
region: Optional[str] = None, |
|
proxies: Optional[dict] = None, |
|
**kwargs, |
|
): |
|
""" |
|
@params api_key and target are the required params |
|
@param api_key: your Microsoft API key |
|
@param region: your Microsoft Location |
|
""" |
|
|
|
if not api_key: |
|
raise ApiKeyException(env_var=MSFT_ENV_VAR) |
|
|
|
self.api_key = api_key |
|
self.proxies = proxies |
|
self.headers = { |
|
"Ocp-Apim-Subscription-Key": self.api_key, |
|
"Content-type": "application/json", |
|
} |
|
|
|
if region: |
|
self.region = region |
|
self.headers["Ocp-Apim-Subscription-Region"] = self.region |
|
super().__init__( |
|
base_url=BASE_URLS.get("MICROSOFT_TRANSLATE"), |
|
source=source, |
|
target=target, |
|
languages=self._get_supported_languages(), |
|
**kwargs, |
|
) |
|
|
|
|
|
|
|
|
|
def _get_supported_languages(self): |
|
microsoft_languages_api_url = ( |
|
"https://api.cognitive.microsofttranslator.com/languages?api-version=3.0&scope" |
|
"=translation " |
|
) |
|
microsoft_languages_response = requests.get( |
|
microsoft_languages_api_url |
|
) |
|
translation_dict = microsoft_languages_response.json()["translation"] |
|
|
|
return { |
|
translation_dict[k]["name"].lower(): k.lower() |
|
for k in translation_dict.keys() |
|
} |
|
|
|
def translate(self, text: str, **kwargs) -> str: |
|
""" |
|
function that uses microsoft translate to translate a text |
|
@param text: desired text to translate |
|
@return: str: translated text |
|
""" |
|
|
|
|
|
response = None |
|
if is_input_valid(text): |
|
self._url_params["from"] = self._source |
|
self._url_params["to"] = self._target |
|
|
|
valid_microsoft_json = [{"text": text}] |
|
try: |
|
response = requests.post( |
|
self._base_url, |
|
params=self._url_params, |
|
headers=self.headers, |
|
json=valid_microsoft_json, |
|
proxies=self.proxies, |
|
) |
|
except requests.exceptions.RequestException: |
|
exc_type, value, traceback = sys.exc_info() |
|
logging.warning(f"Returned error: {exc_type.__name__}") |
|
|
|
if response is None: |
|
raise TranslationNotFound(text) |
|
|
|
|
|
if type(response.json()) is dict: |
|
error_message = response.json()["error"] |
|
raise MicrosoftAPIerror(error_message) |
|
|
|
|
|
elif type(response.json()) is list: |
|
all_translations = [ |
|
i["text"] for i in response.json()[0]["translations"] |
|
] |
|
return "\n".join(all_translations) |
|
|
|
def translate_file(self, path: str, **kwargs) -> str: |
|
""" |
|
translate from a file |
|
@param path: path to file |
|
@return: translated text |
|
""" |
|
return self._translate_file(path, **kwargs) |
|
|
|
def translate_batch(self, batch: List[str], **kwargs) -> List[str]: |
|
""" |
|
translate a batch of texts |
|
@param batch: list of texts to translate |
|
@return: list of translations |
|
""" |
|
return self._translate_batch(batch, **kwargs) |
|
|