File size: 5,625 Bytes
0d67af1
28c2eed
fa90b1d
 
7a05cd7
cb15d3a
7a05cd7
 
78d0ff1
283a7d3
 
 
 
0d1a764
 
58568a7
f06c14e
0d67af1
f06c14e
78d0ff1
 
 
0d89ff1
7ade3be
 
 
 
 
 
283a7d3
78d0ff1
f06c14e
 
 
 
d03a2fc
f651d07
 
4524f4c
 
 
 
 
2bbc526
f06c14e
 
 
58568a7
2bbc526
f06c14e
a0092fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d03a2fc
 
 
2bbc526
f06c14e
cb15d3a
 
2bbc526
cb15d3a
 
f06c14e
2bbc526
f651d07
2bbc526
f651d07
 
71d8e8b
 
 
 
283a7d3
 
 
f06c14e
7ade3be
2bbc526
6add4ab
7ade3be
 
 
2bbc526
7ade3be
cb15d3a
 
2bbc526
 
f651d07
f06c14e
7ade3be
f36740b
2bbc526
 
 
f36740b
78d0ff1
 
f651d07
 
78d0ff1
2bbc526
 
 
f06c14e
58568a7
7ade3be
f36740b
cb15d3a
 
f36740b
 
 
 
78d0ff1
f06c14e
a22d18a
 
cb15d3a
a22d18a
 
 
 
 
 
 
 
 
7ade3be
2bbc526
 
 
 
 
 
 
a22d18a
 
 
 
 
 
 
 
cb15d3a
 
a22d18a
 
cb15d3a
a22d18a
 
78d0ff1
2bbc526
a22d18a
 
eaf7d7b
7ade3be
2bbc526
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
"""base translator class"""

__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"

from abc import ABC, abstractmethod
from pathlib import Path
from typing import List, Optional, Union

from deep_translator.constants import GOOGLE_LANGUAGES_TO_CODES
from deep_translator.exceptions import (
    InvalidSourceOrTargetLanguage,
    LanguageNotSupportedException,
)


class BaseTranslator(ABC):
    """
    Abstract class that serve as a base translator for other different translators
    """

    def __init__(
        self,
        base_url: str = None,
        languages: dict = GOOGLE_LANGUAGES_TO_CODES,
        source: str = "auto",
        target: str = "en",
        payload_key: Optional[str] = None,
        element_tag: Optional[str] = None,
        element_query: Optional[dict] = None,
        **url_params,
    ):
        """
        @param source: source language to translate from
        @param target: target language to translate to
        """
        self._base_url = base_url
        self._languages = languages
        self._supported_languages = list(self._languages.keys())
        if not source:
            raise InvalidSourceOrTargetLanguage(source)
        if not target:
            raise InvalidSourceOrTargetLanguage(target)

        self._source, self._target = self._map_language_to_code(source, target)
        self._url_params = url_params
        self._element_tag = element_tag
        self._element_query = element_query
        self.payload_key = payload_key
        super().__init__()

    @property
    def source(self):
        return self._source

    @source.setter
    def source(self, lang):
        self._source = lang

    @property
    def target(self):
        return self._target

    @target.setter
    def target(self, lang):
        self._target = lang

    def _type(self):
        return self.__class__.__name__

    def _map_language_to_code(self, *languages):
        """
        map language to its corresponding code (abbreviation) if the language was passed
        by its full name by the user
        @param languages: list of languages
        @return: mapped value of the language or raise an exception if the language is
        not supported
        """
        for language in languages:
            if language in self._languages.values() or language == "auto":
                yield language
            elif language in self._languages.keys():
                yield self._languages[language]
            else:
                raise LanguageNotSupportedException(
                    language,
                    message=f"No support for the provided language.\n"
                    f"Please select on of the supported languages:\n"
                    f"{self._languages}",
                )

    def _same_source_target(self) -> bool:
        return self._source == self._target

    def get_supported_languages(
        self, as_dict: bool = False, **kwargs
    ) -> Union[list, dict]:
        """
        return the supported languages by the Google translator
        @param as_dict: if True, the languages will be returned as a dictionary
        mapping languages to their abbreviations
        @return: list or dict
        """
        return self._supported_languages if not as_dict else self._languages

    def is_language_supported(self, language: str, **kwargs) -> bool:
        """
        check if the language is supported by the translator
        @param language: a string for 1 language
        @return: bool or raise an Exception
        """
        if (
            language == "auto"
            or language in self._languages.keys()
            or language in self._languages.values()
        ):
            return True
        else:
            return False

    @abstractmethod
    def translate(self, text: str, **kwargs) -> str:
        """
        translate a text using a translator under the hood and return
        the translated text
        @param text: text to translate
        @param kwargs: additional arguments
        @return: str
        """
        return NotImplemented("You need to implement the translate method!")

    def _read_docx(self, f: str):
        import docx2txt

        return docx2txt.process(f)

    def _read_pdf(self, f: str):
        import pypdf

        reader = pypdf.PdfReader(f)
        page = reader.pages[0]
        return page.extract_text()

    def _translate_file(self, path: str, **kwargs) -> str:
        """
        translate directly from file
        @param path: path to the target file
        @type path: str
        @param kwargs: additional args
        @return: str
        """
        if not isinstance(path, Path):
            path = Path(path)

        if not path.exists():
            print("Path to the file is wrong!")
            exit(1)

        ext = path.suffix

        if ext == ".docx":
            text = self._read_docx(f=str(path))

        elif ext == ".pdf":
            text = self._read_pdf(f=str(path))
        else:
            with open(path, "r", encoding="utf-8") as f:
                text = f.read().strip()

        return self.translate(text)

    def _translate_batch(self, batch: List[str], **kwargs) -> List[str]:
        """
        translate a list of texts
        @param batch: list of texts you want to translate
        @return: list of translations
        """
        if not batch:
            raise Exception("Enter your text list that you want to translate")
        arr = []
        for i, text in enumerate(batch):
            translated = self.translate(text, **kwargs)
            arr.append(translated)
        return arr