marcosremar2 commited on
Commit
a3cafa2
·
1 Parent(s): 671205b

Update PDF to Markdown converter API with NVIDIA L4 support

Browse files
Files changed (1) hide show
  1. pdf_converter/convert_pdf_to_md.py +22 -3
pdf_converter/convert_pdf_to_md.py CHANGED
@@ -1,6 +1,8 @@
1
  import marker
2
  import os
3
  import sys
 
 
4
 
5
  def convert_pdf(pdf_input_path, output_md_path=None):
6
  """
@@ -20,11 +22,27 @@ def convert_pdf(pdf_input_path, output_md_path=None):
20
  print(f"Starting conversion of '{pdf_input_path}'...")
21
 
22
  try:
23
- # Use marker.Marker() for newer versions of marker-pdf
24
- md_converter = marker.Marker()
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  # Convert the PDF to markdown using marker
27
- markdown_text = md_converter.convert_file(pdf_input_path)
 
 
 
28
 
29
  # If output path is provided, save the markdown
30
  if output_md_path:
@@ -40,4 +58,5 @@ def convert_pdf(pdf_input_path, output_md_path=None):
40
 
41
  except Exception as e:
42
  print(f"An error occurred during conversion: {e}", file=sys.stderr)
 
43
  raise
 
1
  import marker
2
  import os
3
  import sys
4
+ from marker.config.parser import ConfigParser
5
+ from marker.models import create_model_dict
6
 
7
  def convert_pdf(pdf_input_path, output_md_path=None):
8
  """
 
22
  print(f"Starting conversion of '{pdf_input_path}'...")
23
 
24
  try:
25
+ # Create configuration
26
+ config_parser = ConfigParser({})
27
+
28
+ # Load models
29
+ models = create_model_dict()
30
+
31
+ # Get converter class and create converter
32
+ converter_cls = config_parser.get_converter_cls()
33
+ converter = converter_cls(
34
+ config=config_parser.generate_config_dict(),
35
+ artifact_dict=models,
36
+ processor_list=config_parser.get_processors(),
37
+ renderer=config_parser.get_renderer(),
38
+ llm_service=config_parser.get_llm_service()
39
+ )
40
 
41
  # Convert the PDF to markdown using marker
42
+ result = converter(pdf_input_path)
43
+
44
+ # The converter returns a dictionary with the markdown content
45
+ markdown_text = result.get('markdown', '')
46
 
47
  # If output path is provided, save the markdown
48
  if output_md_path:
 
58
 
59
  except Exception as e:
60
  print(f"An error occurred during conversion: {e}", file=sys.stderr)
61
+ print(f"Error details: {str(type(e))}", file=sys.stderr)
62
  raise