Spaces:
Running
Running
File size: 726 Bytes
54f5afe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
"""Test Markdown conversion."""
from pathlib import Path
from raglite._markdown import document_to_markdown
def test_pdf_with_missing_font_sizes() -> None:
"""Test conversion of a PDF with missing font sizes."""
# Convert a PDF whose parsed font sizes are all equal to 1.
doc_path = Path(__file__).parent / "specrel.pdf" # Einstein's special relativity paper.
doc = document_to_markdown(doc_path)
# Verify that we can reconstruct the font sizes and heading levels regardless of the missing
# font size data.
expected_heading = """
# ON THE ELECTRODYNAMICS OF MOVING BODIES
## By A. EINSTEIN June 30, 1905
It is known that Maxwell
""".strip()
assert doc.startswith(expected_heading)
|