File size: 1,667 Bytes
f25ee15
 
 
 
 
683fa93
 
e219826
0532015
f25ee15
0532015
 
 
 
 
f25ee15
0532015
 
f25ee15
 
 
 
 
 
 
 
 
 
 
 
0532015
f25ee15
 
0532015
f25ee15
e219826
f25ee15
e219826
f25ee15
e219826
683fa93
e219826
 
0532015
 
e219826
f25ee15
0532015
e219826
f25ee15
 
0532015
683fa93
 
0532015
683fa93
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import os
import shutil
import subprocess
import tempfile
import json
import gradio as gr

def process_upload(pdf_file, output_format):
    out_dir = tempfile.mkdtemp()
    fmt = "markdown" if output_format == "markdown" else "json"
    cmd = [
        "marker_single",
        pdf_file.name,
        "--output_format", fmt,
        "--output_dir", out_dir,
        "--paginate_output"
    ]
    subprocess.run(cmd, check=True)

    # Recursively find only .md/.json files
    collected = []
    for root, _, files in os.walk(out_dir):
        for fname in sorted(files):
            if fmt == "markdown" and fname.lower().endswith(".md"):
                collected.append(os.path.join(root, fname))
            elif fmt == "json" and fname.lower().endswith(".json"):
                collected.append(os.path.join(root, fname))

    pages = []
    for path in collected:
        with open(path, 'r', encoding='utf-8') as f:
            pages.append(f.read())

    shutil.rmtree(out_dir)

    if output_format == "markdown":
        return "\n\n---\n\n".join(pages)
    else:
        return json.dumps({"pages": pages}, indent=2, ensure_ascii=False)

demo = gr.Interface(
    fn=process_upload,
    inputs=[
        gr.File(label="Upload PDF", file_types=[".pdf"]),
        gr.Radio(["markdown","json"], value="markdown", label="Output format")
    ],
    outputs=gr.Code(label="Converted Output"),
    title="PDF → Markdown/JSON with LaTeX Support",
    description=(
        "Upload a PDF and get back Markdown or structured JSON, "
        "with math preserved as LaTeX."
    )
)

if __name__=="__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)