File size: 1,338 Bytes
c8a32e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
[tool.poetry]
name = "marker-pdf"
version = "0.2.5"
description = "Convert PDF to markdown with high speed and accuracy."
authors = ["Vik Paruchuri <[email protected]>"]
readme = "README.md"
license = "GPL-3.0-or-later"
repository = "https://github.com/VikParuchuri/marker"
keywords = ["pdf", "markdown", "ocr", "nlp"]
packages = [
{include = "marker"}
]
include = [
"convert.py",
"convert_single.py",
"chunk_convert.sh",
"chunk_convert.py",
]
[tool.poetry.dependencies]
python = ">=3.9,<3.13,!=3.9.7"
scikit-learn = "^1.3.2"
Pillow = "^10.1.0"
pydantic = "^2.4.2"
pydantic-settings = "^2.0.3"
transformers = "^4.36.2" # 4.36.2 needed because issues with donut models and later versions
numpy = "^1.26.1"
python-dotenv = "^1.0.0"
torch = "^2.2.2" # Issue with torch 2.3.0 and vision models - https://github.com/pytorch/pytorch/issues/121834
ray = "^2.20.0"
tqdm = "^4.66.1"
tabulate = "^0.9.0"
ftfy = "^6.1.1"
texify = "^0.1.8"
rapidfuzz = "^3.8.1"
surya-ocr = "^0.4.3"
filetype = "^1.2.0"
regex = "^2024.4.28"
pdftext = "^0.3.7"
grpcio = "^1.63.0"
[tool.poetry.group.dev.dependencies]
jupyter = "^1.0.0"
[tool.poetry.scripts]
marker = "convert:main"
marker_single = "convert_single:main"
marker_chunk_convert = "chunk_convert:main"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api" |