[tool.poetry] | |
name = "marker-pdf" | |
version = "0.2.5" | |
description = "Convert PDF to markdown with high speed and accuracy." | |
authors = ["Vik Paruchuri <[email protected]>"] | |
readme = "README.md" | |
license = "GPL-3.0-or-later" | |
repository = "https://github.com/VikParuchuri/marker" | |
keywords = ["pdf", "markdown", "ocr", "nlp"] | |
packages = [ | |
{include = "marker"} | |
] | |
include = [ | |
"convert.py", | |
"convert_single.py", | |
"chunk_convert.sh", | |
"chunk_convert.py", | |
] | |
[tool.poetry.dependencies] | |
python = ">=3.9,<3.13,!=3.9.7" | |
scikit-learn = "^1.3.2" | |
Pillow = "^10.1.0" | |
pydantic = "^2.4.2" | |
pydantic-settings = "^2.0.3" | |
transformers = "^4.36.2" # 4.36.2 needed because issues with donut models and later versions | |
numpy = "^1.26.1" | |
python-dotenv = "^1.0.0" | |
torch = "^2.2.2" # Issue with torch 2.3.0 and vision models - https://github.com/pytorch/pytorch/issues/121834 | |
ray = "^2.20.0" | |
tqdm = "^4.66.1" | |
tabulate = "^0.9.0" | |
ftfy = "^6.1.1" | |
texify = "^0.1.8" | |
rapidfuzz = "^3.8.1" | |
surya-ocr = "^0.4.3" | |
filetype = "^1.2.0" | |
regex = "^2024.4.28" | |
pdftext = "^0.3.7" | |
grpcio = "^1.63.0" | |
[tool.poetry.group.dev.dependencies] | |
jupyter = "^1.0.0" | |
[tool.poetry.scripts] | |
marker = "convert:main" | |
marker_single = "convert_single:main" | |
marker_chunk_convert = "chunk_convert:main" | |
[build-system] | |
requires = ["poetry-core"] | |
build-backend = "poetry.core.masonry.api" |