Spaces:
Sleeping
Sleeping
Commit
·
a3cafa2
1
Parent(s):
671205b
Update PDF to Markdown converter API with NVIDIA L4 support
Browse files
pdf_converter/convert_pdf_to_md.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
import marker
|
2 |
import os
|
3 |
import sys
|
|
|
|
|
4 |
|
5 |
def convert_pdf(pdf_input_path, output_md_path=None):
|
6 |
"""
|
@@ -20,11 +22,27 @@ def convert_pdf(pdf_input_path, output_md_path=None):
|
|
20 |
print(f"Starting conversion of '{pdf_input_path}'...")
|
21 |
|
22 |
try:
|
23 |
-
#
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
# Convert the PDF to markdown using marker
|
27 |
-
|
|
|
|
|
|
|
28 |
|
29 |
# If output path is provided, save the markdown
|
30 |
if output_md_path:
|
@@ -40,4 +58,5 @@ def convert_pdf(pdf_input_path, output_md_path=None):
|
|
40 |
|
41 |
except Exception as e:
|
42 |
print(f"An error occurred during conversion: {e}", file=sys.stderr)
|
|
|
43 |
raise
|
|
|
1 |
import marker
|
2 |
import os
|
3 |
import sys
|
4 |
+
from marker.config.parser import ConfigParser
|
5 |
+
from marker.models import create_model_dict
|
6 |
|
7 |
def convert_pdf(pdf_input_path, output_md_path=None):
|
8 |
"""
|
|
|
22 |
print(f"Starting conversion of '{pdf_input_path}'...")
|
23 |
|
24 |
try:
|
25 |
+
# Create configuration
|
26 |
+
config_parser = ConfigParser({})
|
27 |
+
|
28 |
+
# Load models
|
29 |
+
models = create_model_dict()
|
30 |
+
|
31 |
+
# Get converter class and create converter
|
32 |
+
converter_cls = config_parser.get_converter_cls()
|
33 |
+
converter = converter_cls(
|
34 |
+
config=config_parser.generate_config_dict(),
|
35 |
+
artifact_dict=models,
|
36 |
+
processor_list=config_parser.get_processors(),
|
37 |
+
renderer=config_parser.get_renderer(),
|
38 |
+
llm_service=config_parser.get_llm_service()
|
39 |
+
)
|
40 |
|
41 |
# Convert the PDF to markdown using marker
|
42 |
+
result = converter(pdf_input_path)
|
43 |
+
|
44 |
+
# The converter returns a dictionary with the markdown content
|
45 |
+
markdown_text = result.get('markdown', '')
|
46 |
|
47 |
# If output path is provided, save the markdown
|
48 |
if output_md_path:
|
|
|
58 |
|
59 |
except Exception as e:
|
60 |
print(f"An error occurred during conversion: {e}", file=sys.stderr)
|
61 |
+
print(f"Error details: {str(type(e))}", file=sys.stderr)
|
62 |
raise
|