File size: 4,888 Bytes
8d368b7 fa90b1d 8d368b7 0d89ff1 8d368b7 7a05cd7 78d0ff1 0d89ff1 81548de 76aa3b2 8d368b7 2bbc526 8d368b7 78d0ff1 7ade3be 0d89ff1 7ade3be 78d0ff1 8d368b7 2bbc526 8d368b7 0d89ff1 c9c6f12 2bbc526 c9c6f12 8d368b7 80f6aa1 8d368b7 2bbc526 f89616a 2bbc526 219958c 78d0ff1 2bbc526 8d368b7 219958c 2bbc526 283a7d3 cb15d3a 78d0ff1 2bbc526 283a7d3 8d368b7 7ade3be 8d368b7 219958c 76aa3b2 78d0ff1 70f6ed6 78d0ff1 70f6ed6 219958c 78d0ff1 70f6ed6 81548de 70f6ed6 219958c 70f6ed6 219958c 78d0ff1 219958c 78d0ff1 70f6ed6 8d368b7 7ade3be 8d368b7 2bbc526 8d368b7 7ade3be 8d368b7 2bbc526 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
# -*- coding: utf-8 -*-
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"
import logging
import os
import sys
from typing import List, Optional
import requests
from deep_translator.base import BaseTranslator
from deep_translator.constants import BASE_URLS, MSFT_ENV_VAR
from deep_translator.exceptions import (
ApiKeyException,
MicrosoftAPIerror,
TranslationNotFound,
)
from deep_translator.validate import is_input_valid
class MicrosoftTranslator(BaseTranslator):
"""
the class that wraps functions, which use the Microsoft translator under the hood to translate word(s)
"""
def __init__(
self,
source: str = "auto",
target: str = "en",
api_key: Optional[str] = os.getenv(MSFT_ENV_VAR, None),
region: Optional[str] = None,
proxies: Optional[dict] = None,
**kwargs,
):
"""
@params api_key and target are the required params
@param api_key: your Microsoft API key
@param region: your Microsoft Location
"""
if not api_key:
raise ApiKeyException(env_var=MSFT_ENV_VAR)
self.api_key = api_key
self.proxies = proxies
self.headers = {
"Ocp-Apim-Subscription-Key": self.api_key,
"Content-type": "application/json",
}
# parameter region is not required but very common and goes to headers if passed
if region:
self.region = region
self.headers["Ocp-Apim-Subscription-Region"] = self.region
super().__init__(
base_url=BASE_URLS.get("MICROSOFT_TRANSLATE"),
source=source,
target=target,
languages=self._get_supported_languages(),
**kwargs,
)
# this function get the actual supported languages of the msft translator and store them in a dict, where
# the keys are the abbreviations and the values are the languages
# a common variable used in the other translators would be: MICROSOFT_CODES_TO_LANGUAGES
def _get_supported_languages(self):
microsoft_languages_api_url = (
"https://api.cognitive.microsofttranslator.com/languages?api-version=3.0&scope"
"=translation "
)
microsoft_languages_response = requests.get(
microsoft_languages_api_url
)
translation_dict = microsoft_languages_response.json()["translation"]
return {
translation_dict[k]["name"].lower(): k.lower()
for k in translation_dict.keys()
}
def translate(self, text: str, **kwargs) -> str:
"""
function that uses microsoft translate to translate a text
@param text: desired text to translate
@return: str: translated text
"""
# a body must be a list of dicts to process multiple texts;
# I have not added multiple text processing here since it is covered by the translate_batch method
response = None
if is_input_valid(text):
self._url_params["from"] = self._source
self._url_params["to"] = self._target
valid_microsoft_json = [{"text": text}]
try:
response = requests.post(
self._base_url,
params=self._url_params,
headers=self.headers,
json=valid_microsoft_json,
proxies=self.proxies,
)
except requests.exceptions.RequestException:
exc_type, value, traceback = sys.exc_info()
logging.warning(f"Returned error: {exc_type.__name__}")
if response is None:
raise TranslationNotFound(text)
# Where Microsoft API responds with an api error, it returns a dict in response.json()
if type(response.json()) is dict:
error_message = response.json()["error"]
raise MicrosoftAPIerror(error_message)
# Where it responds with a translation, its response.json() is a list
# e.g. [{'translations': [{'text':'Hello world!', 'to': 'en'}]}]
elif type(response.json()) is list:
all_translations = [
i["text"] for i in response.json()[0]["translations"]
]
return "\n".join(all_translations)
def translate_file(self, path: str, **kwargs) -> str:
"""
translate from a file
@param path: path to file
@return: translated text
"""
return self._translate_file(path, **kwargs)
def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
"""
translate a batch of texts
@param batch: list of texts to translate
@return: list of translations
"""
return self._translate_batch(batch, **kwargs)
|