Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -16,8 +16,8 @@ def run_marker(pdf_path, out_dir, fmt):
|
|
16 |
pdf_path,
|
17 |
"--output_format", fmt,
|
18 |
"--output_dir", out_dir,
|
19 |
-
"--extract_images",
|
20 |
-
"--paginate_output"
|
21 |
]
|
22 |
subprocess.run(cmd, check=True)
|
23 |
|
@@ -38,7 +38,7 @@ def process_upload(pdf_file, output_format):
|
|
38 |
fmt = {
|
39 |
"markdown": "markdown",
|
40 |
"json": "json",
|
41 |
-
"docx": "markdown" # produce .md before converting to DOCX
|
42 |
}[output_format]
|
43 |
|
44 |
# 3) Run Marker CLI
|
@@ -70,11 +70,14 @@ def process_upload(pdf_file, output_format):
|
|
70 |
return docx_path
|
71 |
|
72 |
# 6) Non-DOCX: clean up and return Markdown or JSON string
|
73 |
-
|
74 |
if output_format == "markdown":
|
75 |
-
|
76 |
else:
|
77 |
-
|
|
|
|
|
|
|
78 |
|
79 |
# Gradio Interface
|
80 |
demo = gr.Interface(
|
@@ -97,4 +100,4 @@ demo = gr.Interface(
|
|
97 |
)
|
98 |
|
99 |
if __name__ == "__main__":
|
100 |
-
demo.launch(server_name="0.0.0.0", server_port=7860
|
|
|
16 |
pdf_path,
|
17 |
"--output_format", fmt,
|
18 |
"--output_dir", out_dir,
|
19 |
+
"--extract_images", # standalone flag
|
20 |
+
"--paginate_output" # standalone flag
|
21 |
]
|
22 |
subprocess.run(cmd, check=True)
|
23 |
|
|
|
38 |
fmt = {
|
39 |
"markdown": "markdown",
|
40 |
"json": "json",
|
41 |
+
"docx": "markdown" # still produce .md before converting to DOCX
|
42 |
}[output_format]
|
43 |
|
44 |
# 3) Run Marker CLI
|
|
|
70 |
return docx_path
|
71 |
|
72 |
# 6) Non-DOCX: clean up and return Markdown or JSON string
|
73 |
+
result = None
|
74 |
if output_format == "markdown":
|
75 |
+
result = "\n\n---\n\n".join(pages)
|
76 |
else:
|
77 |
+
result = json.dumps({"pages": pages}, indent=2, ensure_ascii=False)
|
78 |
+
|
79 |
+
shutil.rmtree(out_dir)
|
80 |
+
return result
|
81 |
|
82 |
# Gradio Interface
|
83 |
demo = gr.Interface(
|
|
|
100 |
)
|
101 |
|
102 |
if __name__ == "__main__":
|
103 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|