marcosremar2 commited on
Commit
ed4cfc9
·
1 Parent(s): 41ee299

Update PDF to Markdown converter API with NVIDIA L4 support

Browse files
Files changed (2) hide show
  1. Dockerfile +2 -2
  2. pdf_converter/convert_pdf_to_md.py +7 -4
Dockerfile CHANGED
@@ -76,5 +76,5 @@ ENV MARKER_FONT_PATH=/home/user/.cache/marker_fonts
76
  EXPOSE 7860
77
 
78
  # Command to run the application with Gunicorn and Uvicorn workers
79
- # Start with 4 workers. Adjust based on monitoring L40S resources.
80
- CMD ["gunicorn", "-w", "4", "-k", "uvicorn.workers.UvicornWorker", "app.main:app", "--bind", "0.0.0.0:7860"]
 
76
  EXPOSE 7860
77
 
78
  # Command to run the application with Gunicorn and Uvicorn workers
79
+ # Increased workers to 16 for L40S. Adjust based on monitoring.
80
+ CMD ["gunicorn", "-w", "16", "-k", "uvicorn.workers.UvicornWorker", "app.main:app", "--bind", "0.0.0.0:7860"]
pdf_converter/convert_pdf_to_md.py CHANGED
@@ -28,9 +28,12 @@ def initialize_converter():
28
  except Exception as e:
29
  print(f"Error setting custom font path: {e}", file=sys.stderr)
30
 
31
- # Create configuration, explicitly setting output format
32
- # Potential optimization: Check if batch_multiplier or similar exists
33
- config_parser = ConfigParser({'output_format': 'markdown'}) # Add batch_multiplier here if applicable
 
 
 
34
 
35
  # Load models
36
  # Potential optimization: Check if device mapping/multi-GPU is possible
@@ -45,7 +48,7 @@ def initialize_converter():
45
  renderer=config_parser.get_renderer(),
46
  llm_service=config_parser.get_llm_service()
47
  )
48
- print("Marker models initialized successfully.")
49
  except Exception as e:
50
  print(f"Failed to initialize marker models: {e}", file=sys.stderr)
51
  _converter = None # Ensure it's None if init fails
 
28
  except Exception as e:
29
  print(f"Error setting custom font path: {e}", file=sys.stderr)
30
 
31
+ # Create configuration, explicitly setting output format and batch multiplier
32
+ # Increased batch_multiplier for potentially faster processing on L40S
33
+ config_parser = ConfigParser({
34
+ 'output_format': 'markdown',
35
+ 'batch_multiplier': 4 # Increased from default 2
36
+ })
37
 
38
  # Load models
39
  # Potential optimization: Check if device mapping/multi-GPU is possible
 
48
  renderer=config_parser.get_renderer(),
49
  llm_service=config_parser.get_llm_service()
50
  )
51
+ print("Marker models initialized successfully with batch_multiplier=4.")
52
  except Exception as e:
53
  print(f"Failed to initialize marker models: {e}", file=sys.stderr)
54
  _converter = None # Ensure it's None if init fails