Nidhal Baccouri commited on
Commit
a22d18a
·
1 Parent(s): 4d4c29a

added support for translating docx and pdf files

Browse files
deep_translator/base.py CHANGED
@@ -10,6 +10,8 @@ from deep_translator.exceptions import (
10
  InvalidSourceOrTargetLanguage,
11
  LanguageNotSupportedException,
12
  )
 
 
13
 
14
 
15
  class BaseTranslator(ABC):
@@ -123,6 +125,17 @@ class BaseTranslator(ABC):
123
  """
124
  return NotImplemented("You need to implement the translate method!")
125
 
 
 
 
 
 
 
 
 
 
 
 
126
  def _translate_file(self, path: str, **kwargs) -> str:
127
  """
128
  translate directly from file
@@ -131,12 +144,25 @@ class BaseTranslator(ABC):
131
  @param kwargs: additional args
132
  @return: str
133
  """
134
- try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  with open(path, "r", encoding="utf-8") as f:
136
  text = f.read().strip()
137
- return self.translate(text)
138
- except Exception as e:
139
- raise e
140
 
141
  def _translate_batch(self, batch: List[str], **kwargs) -> List[str]:
142
  """
 
10
  InvalidSourceOrTargetLanguage,
11
  LanguageNotSupportedException,
12
  )
13
+ from pathlib import Path
14
+ from bs4 import BeautifulSoup as bs
15
 
16
 
17
  class BaseTranslator(ABC):
 
125
  """
126
  return NotImplemented("You need to implement the translate method!")
127
 
128
+ def _read_docx(self, f: str):
129
+ import docx2txt
130
+ return docx2txt.process(f)
131
+
132
+ def _read_pdf(self, f: str):
133
+ import pypdf
134
+
135
+ reader = pypdf.PdfReader(f)
136
+ page = reader.pages[0]
137
+ return page.extract_text()
138
+
139
  def _translate_file(self, path: str, **kwargs) -> str:
140
  """
141
  translate directly from file
 
144
  @param kwargs: additional args
145
  @return: str
146
  """
147
+ if not isinstance(path, Path):
148
+ path = Path(path)
149
+
150
+ if not path.exists():
151
+ print("Path to the file is wrong!")
152
+ exit(1)
153
+
154
+ ext = path.suffix
155
+
156
+ if ext == '.docx':
157
+ text = self._read_docx(f=str(path))
158
+
159
+ elif ext == '.pdf':
160
+ text = self._read_pdf(f=str(path))
161
+ else:
162
  with open(path, "r", encoding="utf-8") as f:
163
  text = f.read().strip()
164
+
165
+ return self.translate(text)
 
166
 
167
  def _translate_batch(self, batch: List[str], **kwargs) -> List[str]:
168
  """
deep_translator/google.py CHANGED
@@ -121,6 +121,6 @@ class GoogleTranslator(BaseTranslator):
121
 
122
 
123
  if __name__ == "__main__":
124
- trans = GoogleTranslator(source="auto", target="zh-CN")
125
- res = trans.translate("good")
126
  print("translation: ", res)
 
121
 
122
 
123
  if __name__ == "__main__":
124
+ trans = GoogleTranslator(source="auto", target="de")
125
+ res = trans.translate_file(path='/home/nidhal/Documents/translate.pdf')
126
  print("translation: ", res)
poetry.lock CHANGED
@@ -1,10 +1,10 @@
1
  [[package]]
2
  name = "alabaster"
3
- version = "0.7.12"
4
  description = "A configurable sidebar-enabled Sphinx theme"
5
  category = "dev"
6
  optional = false
7
- python-versions = "*"
8
 
9
  [[package]]
10
  name = "atomicwrites"
@@ -16,21 +16,23 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
16
 
17
  [[package]]
18
  name = "attrs"
19
- version = "22.1.0"
20
  description = "Classes Without Boilerplate"
21
  category = "dev"
22
  optional = false
23
- python-versions = ">=3.5"
24
 
25
  [package.extras]
26
- dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"]
27
- docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"]
28
- tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "zope.interface", "cloudpickle"]
29
- tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "mypy (>=0.900,!=0.940)", "pytest-mypy-plugins", "cloudpickle"]
 
 
30
 
31
  [[package]]
32
  name = "babel"
33
- version = "2.10.3"
34
  description = "Internationalization utilities"
35
  category = "dev"
36
  optional = false
@@ -41,7 +43,7 @@ pytz = ">=2015.7"
41
 
42
  [[package]]
43
  name = "beautifulsoup4"
44
- version = "4.11.1"
45
  description = "Screen-scraping library"
46
  category = "main"
47
  optional = false
@@ -56,11 +58,11 @@ lxml = ["lxml"]
56
 
57
  [[package]]
58
  name = "black"
59
- version = "22.8.0"
60
  description = "The uncompromising code formatter."
61
  category = "dev"
62
  optional = false
63
- python-versions = ">=3.6.2"
64
 
65
  [package.dependencies]
66
  click = ">=8.0.0"
@@ -79,7 +81,7 @@ uvloop = ["uvloop (>=0.15.2)"]
79
 
80
  [[package]]
81
  name = "bleach"
82
- version = "5.0.1"
83
  description = "An easy safelist-based HTML-sanitizing tool."
84
  category = "dev"
85
  optional = false
@@ -91,11 +93,10 @@ webencodings = "*"
91
 
92
  [package.extras]
93
  css = ["tinycss2 (>=1.1.0,<1.2)"]
94
- dev = ["build (==0.8.0)", "flake8 (==4.0.1)", "hashin (==0.17.0)", "pip-tools (==6.6.2)", "pytest (==7.1.2)", "Sphinx (==4.3.2)", "tox (==3.25.0)", "twine (==4.0.1)", "wheel (==0.37.1)", "black (==22.3.0)", "mypy (==0.961)"]
95
 
96
  [[package]]
97
  name = "certifi"
98
- version = "2022.9.14"
99
  description = "Python package for providing Mozilla's CA Bundle."
100
  category = "main"
101
  optional = false
@@ -114,14 +115,11 @@ pycparser = "*"
114
 
115
  [[package]]
116
  name = "charset-normalizer"
117
- version = "2.1.1"
118
  description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
119
  category = "main"
120
  optional = false
121
- python-versions = ">=3.6.0"
122
-
123
- [package.extras]
124
- unicode_backport = ["unicodedata2"]
125
 
126
  [[package]]
127
  name = "click"
@@ -148,11 +146,11 @@ click = "*"
148
 
149
  [[package]]
150
  name = "colorama"
151
- version = "0.4.5"
152
  description = "Cross-platform colored terminal text."
153
  category = "dev"
154
  optional = false
155
- python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
156
 
157
  [[package]]
158
  name = "coverage"
@@ -167,7 +165,7 @@ toml = ["toml"]
167
 
168
  [[package]]
169
  name = "cryptography"
170
- version = "38.0.1"
171
  description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
172
  category = "dev"
173
  optional = false
@@ -177,9 +175,9 @@ python-versions = ">=3.6"
177
  cffi = ">=1.12"
178
 
179
  [package.extras]
180
- docs = ["sphinx (>=1.6.5,!=1.8.0,!=3.1.0,!=3.1.1)", "sphinx-rtd-theme"]
181
  docstest = ["pyenchant (>=1.6.11)", "twine (>=1.12.0)", "sphinxcontrib-spelling (>=4.0.1)"]
182
- pep8test = ["black", "flake8", "flake8-import-order", "pep8-naming"]
183
  sdist = ["setuptools-rust (>=0.11.4)"]
184
  ssh = ["bcrypt (>=3.1.5)"]
185
  test = ["pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-subtests", "pytest-xdist", "pretend", "iso8601", "pytz", "hypothesis (>=1.11.4,!=3.79.2)"]
@@ -192,6 +190,14 @@ category = "dev"
192
  optional = false
193
  python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
194
 
 
 
 
 
 
 
 
 
195
  [[package]]
196
  name = "dotty-dict"
197
  version = "1.3.1"
@@ -202,18 +208,18 @@ python-versions = ">=3.5,<4.0"
202
 
203
  [[package]]
204
  name = "gitdb"
205
- version = "4.0.9"
206
  description = "Git Object Database"
207
  category = "dev"
208
  optional = false
209
- python-versions = ">=3.6"
210
 
211
  [package.dependencies]
212
  smmap = ">=3.0.1,<6"
213
 
214
  [[package]]
215
  name = "gitpython"
216
- version = "3.1.27"
217
  description = "GitPython is a python library used to interact with Git repositories"
218
  category = "dev"
219
  optional = false
@@ -241,7 +247,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
241
 
242
  [[package]]
243
  name = "importlib-metadata"
244
- version = "4.12.0"
245
  description = "Read metadata from Python packages"
246
  category = "dev"
247
  optional = false
@@ -252,21 +258,36 @@ typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""}
252
  zipp = ">=0.5"
253
 
254
  [package.extras]
255
- docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)"]
256
  perf = ["ipython"]
257
- testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.3)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "importlib-resources (>=1.3)"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
  [[package]]
260
  name = "iniconfig"
261
- version = "1.1.1"
262
- description = "iniconfig: brain-dead simple config-ini parsing"
263
  category = "dev"
264
  optional = false
265
- python-versions = "*"
266
 
267
  [[package]]
268
  name = "invoke"
269
- version = "1.7.1"
270
  description = "Pythonic task execution"
271
  category = "dev"
272
  optional = false
@@ -274,21 +295,21 @@ python-versions = "*"
274
 
275
  [[package]]
276
  name = "isort"
277
- version = "5.10.1"
278
  description = "A Python utility / library to sort Python imports."
279
  category = "dev"
280
  optional = false
281
- python-versions = ">=3.6.1,<4.0"
282
 
283
  [package.extras]
284
- pipfile_deprecated_finder = ["pipreqs", "requirementslib"]
285
- requirements_deprecated_finder = ["pipreqs", "pip-api"]
286
  colors = ["colorama (>=0.4.3,<0.5.0)"]
287
  plugins = ["setuptools"]
288
 
289
  [[package]]
290
  name = "jaraco.classes"
291
- version = "3.2.2"
292
  description = "Utility functions for Python class constructs"
293
  category = "dev"
294
  optional = false
@@ -298,8 +319,8 @@ python-versions = ">=3.7"
298
  more-itertools = "*"
299
 
300
  [package.extras]
301
- docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "jaraco.tidelift (>=1.4)"]
302
- testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"]
303
 
304
  [[package]]
305
  name = "jeepney"
@@ -329,26 +350,28 @@ i18n = ["Babel (>=2.7)"]
329
 
330
  [[package]]
331
  name = "keyring"
332
- version = "23.9.3"
333
  description = "Store and access your passwords safely."
334
  category = "dev"
335
  optional = false
336
  python-versions = ">=3.7"
337
 
338
  [package.dependencies]
339
- importlib-metadata = {version = ">=3.6", markers = "python_version < \"3.10\""}
 
340
  "jaraco.classes" = "*"
341
  jeepney = {version = ">=0.4.2", markers = "sys_platform == \"linux\""}
342
- pywin32-ctypes = {version = "<0.1.0 || >0.1.0,<0.1.1 || >0.1.1", markers = "sys_platform == \"win32\""}
343
  SecretStorage = {version = ">=3.2", markers = "sys_platform == \"linux\""}
344
 
345
  [package.extras]
346
- docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "jaraco.tidelift (>=1.4)"]
347
- testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"]
 
348
 
349
  [[package]]
350
  name = "markupsafe"
351
- version = "2.1.1"
352
  description = "Safely add untrusted strings to HTML/XML markup."
353
  category = "dev"
354
  optional = false
@@ -356,34 +379,31 @@ python-versions = ">=3.7"
356
 
357
  [[package]]
358
  name = "more-itertools"
359
- version = "8.14.0"
360
  description = "More routines for operating on iterables, beyond itertools"
361
  category = "dev"
362
  optional = false
363
- python-versions = ">=3.5"
364
 
365
  [[package]]
366
  name = "mypy-extensions"
367
- version = "0.4.3"
368
- description = "Experimental type system extensions for programs checked with the mypy typechecker."
369
  category = "dev"
370
  optional = false
371
- python-versions = "*"
372
 
373
  [[package]]
374
  name = "packaging"
375
- version = "21.3"
376
  description = "Core utilities for Python packages"
377
  category = "dev"
378
  optional = false
379
- python-versions = ">=3.6"
380
-
381
- [package.dependencies]
382
- pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
383
 
384
  [[package]]
385
  name = "pathspec"
386
- version = "0.10.1"
387
  description = "Utility library for gitignore style pattern matching of file paths."
388
  category = "dev"
389
  optional = false
@@ -391,26 +411,29 @@ python-versions = ">=3.7"
391
 
392
  [[package]]
393
  name = "pkginfo"
394
- version = "1.8.3"
395
- description = "Query metadatdata from sdists / bdists / installed packages."
396
  category = "dev"
397
  optional = false
398
- python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
399
 
400
  [package.extras]
401
- testing = ["coverage", "nose"]
402
 
403
  [[package]]
404
  name = "platformdirs"
405
- version = "2.5.2"
406
- description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
407
  category = "dev"
408
  optional = false
409
  python-versions = ">=3.7"
410
 
 
 
 
411
  [package.extras]
412
- docs = ["furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx-autodoc-typehints (>=1.12)", "sphinx (>=4)"]
413
- test = ["appdirs (==1.4.4)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)", "pytest (>=6)"]
414
 
415
  [[package]]
416
  name = "pluggy"
@@ -445,7 +468,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
445
 
446
  [[package]]
447
  name = "pygments"
448
- version = "2.13.0"
449
  description = "Pygments is a syntax highlighting package written in Python."
450
  category = "dev"
451
  optional = false
@@ -455,15 +478,22 @@ python-versions = ">=3.6"
455
  plugins = ["importlib-metadata"]
456
 
457
  [[package]]
458
- name = "pyparsing"
459
- version = "3.0.9"
460
- description = "pyparsing module - Classes and methods to define and execute parsing grammars"
461
- category = "dev"
462
  optional = false
463
- python-versions = ">=3.6.8"
 
 
 
464
 
465
  [package.extras]
466
- diagrams = ["railroad-diagrams", "jinja2"]
 
 
 
 
467
 
468
  [[package]]
469
  name = "pytest"
@@ -501,7 +531,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes
501
 
502
  [[package]]
503
  name = "python-gitlab"
504
- version = "3.9.0"
505
  description = "Interact with GitLab API"
506
  category = "dev"
507
  optional = false
@@ -509,7 +539,7 @@ python-versions = ">=3.7.0"
509
 
510
  [package.dependencies]
511
  requests = ">=2.25.0"
512
- requests-toolbelt = ">=0.9.1"
513
 
514
  [package.extras]
515
  autocompletion = ["argcomplete (>=1.10.0,<3)"]
@@ -517,7 +547,7 @@ yaml = ["PyYaml (>=5.2)"]
517
 
518
  [[package]]
519
  name = "python-semantic-release"
520
- version = "7.31.4"
521
  description = "Automatic Semantic Versioning for Python projects"
522
  category = "dev"
523
  optional = false
@@ -540,11 +570,11 @@ twine = ">=3,<4"
540
  dev = ["tox", "isort", "black"]
541
  docs = ["Sphinx (==1.3.6)", "Jinja2 (==3.0.3)"]
542
  mypy = ["mypy", "types-requests"]
543
- test = ["coverage (>=5,<6)", "pytest (>=5,<6)", "pytest-xdist (>=1,<2)", "pytest-mock (>=2,<3)", "responses (==0.13.3)", "mock (==1.3.0)"]
544
 
545
  [[package]]
546
  name = "pytz"
547
- version = "2022.2.1"
548
  description = "World timezone definitions, modern and historical"
549
  category = "dev"
550
  optional = false
@@ -560,7 +590,7 @@ python-versions = "*"
560
 
561
  [[package]]
562
  name = "readme-renderer"
563
- version = "37.1"
564
  description = "readme_renderer is a library for rendering \"readme\" descriptions for Warehouse"
565
  category = "dev"
566
  optional = false
@@ -576,7 +606,7 @@ md = ["cmarkgfm (>=0.8.0)"]
576
 
577
  [[package]]
578
  name = "requests"
579
- version = "2.28.1"
580
  description = "Python HTTP for Humans."
581
  category = "main"
582
  optional = false
@@ -584,7 +614,7 @@ python-versions = ">=3.7, <4"
584
 
585
  [package.dependencies]
586
  certifi = ">=2017.4.17"
587
- charset-normalizer = ">=2,<3"
588
  idna = ">=2.5,<4"
589
  urllib3 = ">=1.21.1,<1.27"
590
 
@@ -594,11 +624,11 @@ use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"]
594
 
595
  [[package]]
596
  name = "requests-toolbelt"
597
- version = "0.9.1"
598
  description = "A utility belt for advanced users of python-requests"
599
  category = "dev"
600
  optional = false
601
- python-versions = "*"
602
 
603
  [package.dependencies]
604
  requests = ">=2.0.1,<3.0.0"
@@ -787,11 +817,11 @@ python-versions = ">=3.7"
787
 
788
  [[package]]
789
  name = "tomlkit"
790
- version = "0.11.4"
791
  description = "Style preserving TOML library"
792
  category = "dev"
793
  optional = false
794
- python-versions = ">=3.6,<4.0"
795
 
796
  [[package]]
797
  name = "tqdm"
@@ -840,19 +870,19 @@ python-versions = ">=3.6"
840
 
841
  [[package]]
842
  name = "typing-extensions"
843
- version = "4.3.0"
844
  description = "Backported and Experimental Type Hints for Python 3.7+"
845
- category = "dev"
846
  optional = false
847
  python-versions = ">=3.7"
848
 
849
  [[package]]
850
  name = "urllib3"
851
- version = "1.26.12"
852
  description = "HTTP library with thread-safe connection pooling, file post, and more."
853
  category = "main"
854
  optional = false
855
- python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4"
856
 
857
  [package.extras]
858
  brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"]
@@ -869,20 +899,20 @@ python-versions = "*"
869
 
870
  [[package]]
871
  name = "zipp"
872
- version = "3.8.1"
873
  description = "Backport of pathlib-compatible object wrapper for zip files"
874
  category = "dev"
875
  optional = false
876
  python-versions = ">=3.7"
877
 
878
  [package.extras]
879
- docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "jaraco.tidelift (>=1.4)"]
880
- testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.3)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"]
881
 
882
  [metadata]
883
  lock-version = "1.1"
884
  python-versions = "^3.7"
885
- content-hash = "054ce1ccb3c4971456c37b9161434ede7867a152627dd1fd40a2b7b7b83d3bf2"
886
 
887
  [metadata.files]
888
  alabaster = []
@@ -901,12 +931,14 @@ colorama = []
901
  coverage = []
902
  cryptography = []
903
  docutils = []
 
904
  dotty-dict = []
905
  gitdb = []
906
  gitpython = []
907
  idna = []
908
  imagesize = []
909
  importlib-metadata = []
 
910
  iniconfig = []
911
  invoke = []
912
  isort = []
@@ -925,7 +957,7 @@ pluggy = []
925
  py = []
926
  pycparser = []
927
  pygments = []
928
- pyparsing = []
929
  pytest = []
930
  pytest-runner = []
931
  python-gitlab = []
 
1
  [[package]]
2
  name = "alabaster"
3
+ version = "0.7.13"
4
  description = "A configurable sidebar-enabled Sphinx theme"
5
  category = "dev"
6
  optional = false
7
+ python-versions = ">=3.6"
8
 
9
  [[package]]
10
  name = "atomicwrites"
 
16
 
17
  [[package]]
18
  name = "attrs"
19
+ version = "22.2.0"
20
  description = "Classes Without Boilerplate"
21
  category = "dev"
22
  optional = false
23
+ python-versions = ">=3.6"
24
 
25
  [package.extras]
26
+ cov = ["attrs", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"]
27
+ dev = ["attrs"]
28
+ docs = ["furo", "sphinx", "myst-parser", "zope.interface", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier"]
29
+ tests = ["attrs", "zope.interface"]
30
+ tests-no-zope = ["hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist", "cloudpickle", "mypy (>=0.971,<0.990)", "pytest-mypy-plugins"]
31
+ tests_no_zope = ["hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist", "cloudpickle", "mypy (>=0.971,<0.990)", "pytest-mypy-plugins"]
32
 
33
  [[package]]
34
  name = "babel"
35
+ version = "2.11.0"
36
  description = "Internationalization utilities"
37
  category = "dev"
38
  optional = false
 
43
 
44
  [[package]]
45
  name = "beautifulsoup4"
46
+ version = "4.11.2"
47
  description = "Screen-scraping library"
48
  category = "main"
49
  optional = false
 
58
 
59
  [[package]]
60
  name = "black"
61
+ version = "22.12.0"
62
  description = "The uncompromising code formatter."
63
  category = "dev"
64
  optional = false
65
+ python-versions = ">=3.7"
66
 
67
  [package.dependencies]
68
  click = ">=8.0.0"
 
81
 
82
  [[package]]
83
  name = "bleach"
84
+ version = "6.0.0"
85
  description = "An easy safelist-based HTML-sanitizing tool."
86
  category = "dev"
87
  optional = false
 
93
 
94
  [package.extras]
95
  css = ["tinycss2 (>=1.1.0,<1.2)"]
 
96
 
97
  [[package]]
98
  name = "certifi"
99
+ version = "2022.12.7"
100
  description = "Python package for providing Mozilla's CA Bundle."
101
  category = "main"
102
  optional = false
 
115
 
116
  [[package]]
117
  name = "charset-normalizer"
118
+ version = "3.0.1"
119
  description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
120
  category = "main"
121
  optional = false
122
+ python-versions = "*"
 
 
 
123
 
124
  [[package]]
125
  name = "click"
 
146
 
147
  [[package]]
148
  name = "colorama"
149
+ version = "0.4.6"
150
  description = "Cross-platform colored terminal text."
151
  category = "dev"
152
  optional = false
153
+ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
154
 
155
  [[package]]
156
  name = "coverage"
 
165
 
166
  [[package]]
167
  name = "cryptography"
168
+ version = "39.0.0"
169
  description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
170
  category = "dev"
171
  optional = false
 
175
  cffi = ">=1.12"
176
 
177
  [package.extras]
178
+ docs = ["sphinx (>=1.6.5,!=1.8.0,!=3.1.0,!=3.1.1,!=5.2.0,!=5.2.0.post0)", "sphinx-rtd-theme"]
179
  docstest = ["pyenchant (>=1.6.11)", "twine (>=1.12.0)", "sphinxcontrib-spelling (>=4.0.1)"]
180
+ pep8test = ["black", "ruff"]
181
  sdist = ["setuptools-rust (>=0.11.4)"]
182
  ssh = ["bcrypt (>=3.1.5)"]
183
  test = ["pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-subtests", "pytest-xdist", "pretend", "iso8601", "pytz", "hypothesis (>=1.11.4,!=3.79.2)"]
 
190
  optional = false
191
  python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
192
 
193
+ [[package]]
194
+ name = "docx2txt"
195
+ version = "0.8"
196
+ description = "A pure python-based utility to extract text and images from docx files."
197
+ category = "main"
198
+ optional = false
199
+ python-versions = "*"
200
+
201
  [[package]]
202
  name = "dotty-dict"
203
  version = "1.3.1"
 
208
 
209
  [[package]]
210
  name = "gitdb"
211
+ version = "4.0.10"
212
  description = "Git Object Database"
213
  category = "dev"
214
  optional = false
215
+ python-versions = ">=3.7"
216
 
217
  [package.dependencies]
218
  smmap = ">=3.0.1,<6"
219
 
220
  [[package]]
221
  name = "gitpython"
222
+ version = "3.1.30"
223
  description = "GitPython is a python library used to interact with Git repositories"
224
  category = "dev"
225
  optional = false
 
247
 
248
  [[package]]
249
  name = "importlib-metadata"
250
+ version = "6.0.0"
251
  description = "Read metadata from Python packages"
252
  category = "dev"
253
  optional = false
 
258
  zipp = ">=0.5"
259
 
260
  [package.extras]
261
+ docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "furo", "sphinx-lint", "jaraco.tidelift (>=1.4)"]
262
  perf = ["ipython"]
263
+ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "pytest-flake8", "importlib-resources (>=1.3)"]
264
+
265
+ [[package]]
266
+ name = "importlib-resources"
267
+ version = "5.10.2"
268
+ description = "Read resources from Python packages"
269
+ category = "dev"
270
+ optional = false
271
+ python-versions = ">=3.7"
272
+
273
+ [package.dependencies]
274
+ zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""}
275
+
276
+ [package.extras]
277
+ docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "furo", "sphinx-lint", "jaraco.tidelift (>=1.4)"]
278
+ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "pytest-flake8"]
279
 
280
  [[package]]
281
  name = "iniconfig"
282
+ version = "2.0.0"
283
+ description = "brain-dead simple config-ini parsing"
284
  category = "dev"
285
  optional = false
286
+ python-versions = ">=3.7"
287
 
288
  [[package]]
289
  name = "invoke"
290
+ version = "1.7.3"
291
  description = "Pythonic task execution"
292
  category = "dev"
293
  optional = false
 
295
 
296
  [[package]]
297
  name = "isort"
298
+ version = "5.11.5"
299
  description = "A Python utility / library to sort Python imports."
300
  category = "dev"
301
  optional = false
302
+ python-versions = ">=3.7.0"
303
 
304
  [package.extras]
305
+ pipfile-deprecated-finder = ["pipreqs", "requirementslib", "pip-shims (>=0.5.2)"]
306
+ requirements-deprecated-finder = ["pipreqs", "pip-api"]
307
  colors = ["colorama (>=0.4.3,<0.5.0)"]
308
  plugins = ["setuptools"]
309
 
310
  [[package]]
311
  name = "jaraco.classes"
312
+ version = "3.2.3"
313
  description = "Utility functions for Python class constructs"
314
  category = "dev"
315
  optional = false
 
319
  more-itertools = "*"
320
 
321
  [package.extras]
322
+ docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "jaraco.tidelift (>=1.4)"]
323
+ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"]
324
 
325
  [[package]]
326
  name = "jeepney"
 
350
 
351
  [[package]]
352
  name = "keyring"
353
+ version = "23.13.1"
354
  description = "Store and access your passwords safely."
355
  category = "dev"
356
  optional = false
357
  python-versions = ">=3.7"
358
 
359
  [package.dependencies]
360
+ importlib-metadata = {version = ">=4.11.4", markers = "python_version < \"3.12\""}
361
+ importlib-resources = {version = "*", markers = "python_version < \"3.9\""}
362
  "jaraco.classes" = "*"
363
  jeepney = {version = ">=0.4.2", markers = "sys_platform == \"linux\""}
364
+ pywin32-ctypes = {version = ">=0.2.0", markers = "sys_platform == \"win32\""}
365
  SecretStorage = {version = ">=3.2", markers = "sys_platform == \"linux\""}
366
 
367
  [package.extras]
368
+ completion = ["shtab"]
369
+ docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "furo", "jaraco.tidelift (>=1.4)"]
370
+ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "pytest-flake8"]
371
 
372
  [[package]]
373
  name = "markupsafe"
374
+ version = "2.1.2"
375
  description = "Safely add untrusted strings to HTML/XML markup."
376
  category = "dev"
377
  optional = false
 
379
 
380
  [[package]]
381
  name = "more-itertools"
382
+ version = "9.0.0"
383
  description = "More routines for operating on iterables, beyond itertools"
384
  category = "dev"
385
  optional = false
386
+ python-versions = ">=3.7"
387
 
388
  [[package]]
389
  name = "mypy-extensions"
390
+ version = "1.0.0"
391
+ description = "Type system extensions for programs checked with the mypy type checker."
392
  category = "dev"
393
  optional = false
394
+ python-versions = ">=3.5"
395
 
396
  [[package]]
397
  name = "packaging"
398
+ version = "23.0"
399
  description = "Core utilities for Python packages"
400
  category = "dev"
401
  optional = false
402
+ python-versions = ">=3.7"
 
 
 
403
 
404
  [[package]]
405
  name = "pathspec"
406
+ version = "0.11.0"
407
  description = "Utility library for gitignore style pattern matching of file paths."
408
  category = "dev"
409
  optional = false
 
411
 
412
  [[package]]
413
  name = "pkginfo"
414
+ version = "1.9.6"
415
+ description = "Query metadata from sdists / bdists / installed packages."
416
  category = "dev"
417
  optional = false
418
+ python-versions = ">=3.6"
419
 
420
  [package.extras]
421
+ testing = ["pytest", "pytest-cov"]
422
 
423
  [[package]]
424
  name = "platformdirs"
425
+ version = "2.6.2"
426
+ description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
427
  category = "dev"
428
  optional = false
429
  python-versions = ">=3.7"
430
 
431
+ [package.dependencies]
432
+ typing-extensions = {version = ">=4.4", markers = "python_version < \"3.8\""}
433
+
434
  [package.extras]
435
+ docs = ["furo (>=2022.12.7)", "proselint (>=0.13)", "sphinx-autodoc-typehints (>=1.19.5)", "sphinx (>=5.3)"]
436
+ test = ["appdirs (==1.4.4)", "covdefaults (>=2.2.2)", "pytest-cov (>=4)", "pytest-mock (>=3.10)", "pytest (>=7.2)"]
437
 
438
  [[package]]
439
  name = "pluggy"
 
468
 
469
  [[package]]
470
  name = "pygments"
471
+ version = "2.14.0"
472
  description = "Pygments is a syntax highlighting package written in Python."
473
  category = "dev"
474
  optional = false
 
478
  plugins = ["importlib-metadata"]
479
 
480
  [[package]]
481
+ name = "pypdf"
482
+ version = "3.3.0"
483
+ description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files"
484
+ category = "main"
485
  optional = false
486
+ python-versions = ">=3.6"
487
+
488
+ [package.dependencies]
489
+ typing_extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}
490
 
491
  [package.extras]
492
+ crypto = ["pycryptodome"]
493
+ dev = ["black", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "flit", "wheel"]
494
+ docs = ["sphinx", "sphinx-rtd-theme", "myst-parser"]
495
+ full = ["pycryptodome", "pillow"]
496
+ image = ["pillow"]
497
 
498
  [[package]]
499
  name = "pytest"
 
531
 
532
  [[package]]
533
  name = "python-gitlab"
534
+ version = "3.13.0"
535
  description = "Interact with GitLab API"
536
  category = "dev"
537
  optional = false
 
539
 
540
  [package.dependencies]
541
  requests = ">=2.25.0"
542
+ requests-toolbelt = ">=0.10.1"
543
 
544
  [package.extras]
545
  autocompletion = ["argcomplete (>=1.10.0,<3)"]
 
547
 
548
  [[package]]
549
  name = "python-semantic-release"
550
+ version = "7.33.1"
551
  description = "Automatic Semantic Versioning for Python projects"
552
  category = "dev"
553
  optional = false
 
570
  dev = ["tox", "isort", "black"]
571
  docs = ["Sphinx (==1.3.6)", "Jinja2 (==3.0.3)"]
572
  mypy = ["mypy", "types-requests"]
573
+ test = ["coverage (>=5,<6)", "pytest (>=7,<8)", "pytest-xdist (>=1,<2)", "pytest-mock (>=2,<3)", "responses (==0.13.3)", "mock (==1.3.0)"]
574
 
575
  [[package]]
576
  name = "pytz"
577
+ version = "2022.7.1"
578
  description = "World timezone definitions, modern and historical"
579
  category = "dev"
580
  optional = false
 
590
 
591
  [[package]]
592
  name = "readme-renderer"
593
+ version = "37.3"
594
  description = "readme_renderer is a library for rendering \"readme\" descriptions for Warehouse"
595
  category = "dev"
596
  optional = false
 
606
 
607
  [[package]]
608
  name = "requests"
609
+ version = "2.28.2"
610
  description = "Python HTTP for Humans."
611
  category = "main"
612
  optional = false
 
614
 
615
  [package.dependencies]
616
  certifi = ">=2017.4.17"
617
+ charset-normalizer = ">=2,<4"
618
  idna = ">=2.5,<4"
619
  urllib3 = ">=1.21.1,<1.27"
620
 
 
624
 
625
  [[package]]
626
  name = "requests-toolbelt"
627
+ version = "0.10.1"
628
  description = "A utility belt for advanced users of python-requests"
629
  category = "dev"
630
  optional = false
631
+ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
632
 
633
  [package.dependencies]
634
  requests = ">=2.0.1,<3.0.0"
 
817
 
818
  [[package]]
819
  name = "tomlkit"
820
+ version = "0.11.6"
821
  description = "Style preserving TOML library"
822
  category = "dev"
823
  optional = false
824
+ python-versions = ">=3.6"
825
 
826
  [[package]]
827
  name = "tqdm"
 
870
 
871
  [[package]]
872
  name = "typing-extensions"
873
+ version = "4.4.0"
874
  description = "Backported and Experimental Type Hints for Python 3.7+"
875
+ category = "main"
876
  optional = false
877
  python-versions = ">=3.7"
878
 
879
  [[package]]
880
  name = "urllib3"
881
+ version = "1.26.14"
882
  description = "HTTP library with thread-safe connection pooling, file post, and more."
883
  category = "main"
884
  optional = false
885
+ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
886
 
887
  [package.extras]
888
  brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"]
 
899
 
900
  [[package]]
901
  name = "zipp"
902
+ version = "3.12.0"
903
  description = "Backport of pathlib-compatible object wrapper for zip files"
904
  category = "dev"
905
  optional = false
906
  python-versions = ">=3.7"
907
 
908
  [package.extras]
909
+ docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "furo", "sphinx-lint", "jaraco.tidelift (>=1.4)"]
910
+ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "jaraco.itertools", "func-timeout", "jaraco.functools", "more-itertools", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "pytest-flake8"]
911
 
912
  [metadata]
913
  lock-version = "1.1"
914
  python-versions = "^3.7"
915
+ content-hash = "cd34af7874c97d88a33945a0bdd55f8094141ff27937a19a1a08e6b232f6ecea"
916
 
917
  [metadata.files]
918
  alabaster = []
 
931
  coverage = []
932
  cryptography = []
933
  docutils = []
934
+ docx2txt = []
935
  dotty-dict = []
936
  gitdb = []
937
  gitpython = []
938
  idna = []
939
  imagesize = []
940
  importlib-metadata = []
941
+ importlib-resources = []
942
  iniconfig = []
943
  invoke = []
944
  isort = []
 
957
  py = []
958
  pycparser = []
959
  pygments = []
960
+ pypdf = []
961
  pytest = []
962
  pytest-runner = []
963
  python-gitlab = []
pyproject.toml CHANGED
@@ -31,6 +31,8 @@ dt = 'deep_translator.__main__:main'
31
  python = "^3.7"
32
  beautifulsoup4 = "^4.9.1"
33
  requests = "^2.23.0"
 
 
34
 
35
  [tool.poetry.dev-dependencies]
36
  wheel = "^0.38.4"
 
31
  python = "^3.7"
32
  beautifulsoup4 = "^4.9.1"
33
  requests = "^2.23.0"
34
+ docx2txt = {version = "^0.8", extras = ["docx"]}
35
+ pypdf = {version = "^3.3.0", extras = ["pdf"]}
36
 
37
  [tool.poetry.dev-dependencies]
38
  wheel = "^0.38.4"