File size: 726 Bytes
54f5afe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
"""Test Markdown conversion."""

from pathlib import Path

from raglite._markdown import document_to_markdown


def test_pdf_with_missing_font_sizes() -> None:
    """Test conversion of a PDF with missing font sizes."""
    # Convert a PDF whose parsed font sizes are all equal to 1.
    doc_path = Path(__file__).parent / "specrel.pdf"  # Einstein's special relativity paper.
    doc = document_to_markdown(doc_path)
    # Verify that we can reconstruct the font sizes and heading levels regardless of the missing
    # font size data.
    expected_heading = """
# ON THE ELECTRODYNAMICS OF MOVING BODIES

## By A. EINSTEIN  June 30, 1905

It is known that Maxwell
    """.strip()
    assert doc.startswith(expected_heading)