File size: 1,338 Bytes
c8a32e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
[tool.poetry]
name = "marker-pdf"
version = "0.2.5"
description = "Convert PDF to markdown with high speed and accuracy."
authors = ["Vik Paruchuri <[email protected]>"]
readme = "README.md"
license = "GPL-3.0-or-later"
repository = "https://github.com/VikParuchuri/marker"
keywords = ["pdf", "markdown", "ocr", "nlp"]
packages = [
    {include = "marker"}
]
include = [
    "convert.py",
    "convert_single.py",
    "chunk_convert.sh",
    "chunk_convert.py",
]

[tool.poetry.dependencies]
python = ">=3.9,<3.13,!=3.9.7"
scikit-learn = "^1.3.2"
Pillow = "^10.1.0"
pydantic = "^2.4.2"
pydantic-settings = "^2.0.3"
transformers = "^4.36.2" # 4.36.2 needed because issues with donut models and later versions
numpy = "^1.26.1"
python-dotenv = "^1.0.0"
torch = "^2.2.2" # Issue with torch 2.3.0 and vision models - https://github.com/pytorch/pytorch/issues/121834
ray = "^2.20.0"
tqdm = "^4.66.1"
tabulate = "^0.9.0"
ftfy = "^6.1.1"
texify = "^0.1.8"
rapidfuzz = "^3.8.1"
surya-ocr = "^0.4.3"
filetype = "^1.2.0"
regex = "^2024.4.28"
pdftext = "^0.3.7"
grpcio = "^1.63.0"

[tool.poetry.group.dev.dependencies]
jupyter = "^1.0.0"

[tool.poetry.scripts]
marker = "convert:main"
marker_single = "convert_single:main"
marker_chunk_convert = "chunk_convert:main"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"