File size: 4,888 Bytes
8d368b7
 
fa90b1d
 
8d368b7
0d89ff1
8d368b7
7a05cd7
 
 
 
78d0ff1
0d89ff1
81548de
 
 
 
 
76aa3b2
8d368b7
 
2bbc526
8d368b7
 
 
 
78d0ff1
 
7ade3be
 
0d89ff1
 
7ade3be
78d0ff1
 
8d368b7
 
 
 
 
2bbc526
8d368b7
0d89ff1
c9c6f12
2bbc526
c9c6f12
8d368b7
 
 
 
80f6aa1
8d368b7
 
 
2bbc526
f89616a
2bbc526
 
219958c
78d0ff1
2bbc526
8d368b7
219958c
 
 
2bbc526
283a7d3
 
 
 
cb15d3a
 
 
78d0ff1
2bbc526
283a7d3
 
 
 
8d368b7
7ade3be
8d368b7
 
 
 
 
 
 
219958c
76aa3b2
78d0ff1
 
70f6ed6
78d0ff1
70f6ed6
219958c
78d0ff1
 
 
 
 
 
70f6ed6
 
 
 
81548de
 
 
70f6ed6
219958c
 
70f6ed6
219958c
 
 
78d0ff1
219958c
78d0ff1
70f6ed6
8d368b7
7ade3be
8d368b7
 
 
 
 
2bbc526
8d368b7
7ade3be
8d368b7
 
 
 
 
2bbc526
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# -*- coding: utf-8 -*-

__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"

import logging
import os
import sys
from typing import List, Optional

import requests

from deep_translator.base import BaseTranslator
from deep_translator.constants import BASE_URLS, MSFT_ENV_VAR
from deep_translator.exceptions import (
    ApiKeyException,
    MicrosoftAPIerror,
    TranslationNotFound,
)
from deep_translator.validate import is_input_valid


class MicrosoftTranslator(BaseTranslator):
    """
    the class that wraps functions, which use the Microsoft translator under the hood to translate word(s)
    """

    def __init__(
        self,
        source: str = "auto",
        target: str = "en",
        api_key: Optional[str] = os.getenv(MSFT_ENV_VAR, None),
        region: Optional[str] = None,
        proxies: Optional[dict] = None,
        **kwargs,
    ):
        """
        @params api_key and target are the required params
        @param api_key: your Microsoft API key
        @param region: your Microsoft Location
        """

        if not api_key:
            raise ApiKeyException(env_var=MSFT_ENV_VAR)

        self.api_key = api_key
        self.proxies = proxies
        self.headers = {
            "Ocp-Apim-Subscription-Key": self.api_key,
            "Content-type": "application/json",
        }
        # parameter region is not required but very common and goes to headers if passed
        if region:
            self.region = region
            self.headers["Ocp-Apim-Subscription-Region"] = self.region
        super().__init__(
            base_url=BASE_URLS.get("MICROSOFT_TRANSLATE"),
            source=source,
            target=target,
            languages=self._get_supported_languages(),
            **kwargs,
        )

    # this function get the actual supported languages of the msft translator and store them in a dict, where
    # the keys are the abbreviations and the values are the languages
    # a common variable used in the other translators would be: MICROSOFT_CODES_TO_LANGUAGES
    def _get_supported_languages(self):
        microsoft_languages_api_url = (
            "https://api.cognitive.microsofttranslator.com/languages?api-version=3.0&scope"
            "=translation "
        )
        microsoft_languages_response = requests.get(
            microsoft_languages_api_url
        )
        translation_dict = microsoft_languages_response.json()["translation"]

        return {
            translation_dict[k]["name"].lower(): k.lower()
            for k in translation_dict.keys()
        }

    def translate(self, text: str, **kwargs) -> str:
        """
        function that uses microsoft translate to translate a text
        @param text: desired text to translate
        @return: str: translated text
        """
        # a body must be a list of dicts to process multiple texts;
        # I have not added multiple text processing here since it is covered by the translate_batch method
        response = None
        if is_input_valid(text):
            self._url_params["from"] = self._source
            self._url_params["to"] = self._target

            valid_microsoft_json = [{"text": text}]
            try:
                response = requests.post(
                    self._base_url,
                    params=self._url_params,
                    headers=self.headers,
                    json=valid_microsoft_json,
                    proxies=self.proxies,
                )
            except requests.exceptions.RequestException:
                exc_type, value, traceback = sys.exc_info()
                logging.warning(f"Returned error: {exc_type.__name__}")

            if response is None:
                raise TranslationNotFound(text)

            # Where Microsoft API responds with an api error, it returns a dict in response.json()
            if type(response.json()) is dict:
                error_message = response.json()["error"]
                raise MicrosoftAPIerror(error_message)
            # Where it responds with a translation, its response.json() is a list
            # e.g. [{'translations': [{'text':'Hello world!', 'to': 'en'}]}]
            elif type(response.json()) is list:
                all_translations = [
                    i["text"] for i in response.json()[0]["translations"]
                ]
                return "\n".join(all_translations)

    def translate_file(self, path: str, **kwargs) -> str:
        """
        translate from a file
        @param path: path to file
        @return: translated text
        """
        return self._translate_file(path, **kwargs)

    def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
        """
        translate a batch of texts
        @param batch: list of texts to translate
        @return: list of translations
        """
        return self._translate_batch(batch, **kwargs)