Spaces:

marcosremar2
/

docker_mineru

Sleeping

marcosremar2 commited on May 3

Commit

41ee299

1 Parent(s): a49c5dc

Update PDF to Markdown converter API with NVIDIA L4 support

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -59,12 +59,18 @@ COPY --chown=user:user . .
 RUN mkdir -p /app/docker_mineru/output/images && \
     chown -R user:user /app/docker_mineru/output
 # Set the user
 USER user
 # Environment variables for caching (optional, might help with model downloads)
 ENV HF_HOME=/home/user/.cache/huggingface
 ENV TORCH_HOME=/home/user/.cache/torch
 # Expose the port
 EXPOSE 7860

 RUN mkdir -p /app/docker_mineru/output/images && \
     chown -R user:user /app/docker_mineru/output
+# Create marker static directory and set proper permissions (fix for font download error)
+RUN mkdir -p /usr/local/lib/python3.10/dist-packages/static && \
+    chmod -R 777 /usr/local/lib/python3.10/dist-packages/static
 # Set the user
 USER user
 # Environment variables for caching (optional, might help with model downloads)
 ENV HF_HOME=/home/user/.cache/huggingface
 ENV TORCH_HOME=/home/user/.cache/torch
+# Add environment variable for marker font path (alternative fix)
+ENV MARKER_FONT_PATH=/home/user/.cache/marker_fonts
 # Expose the port
 EXPOSE 7860

pdf_converter/convert_pdf_to_md.py CHANGED Viewed

@@ -13,6 +13,21 @@ def initialize_converter():
     if _converter is None:
         print("Initializing marker models...")
         try:
             # Create configuration, explicitly setting output format
             # Potential optimization: Check if batch_multiplier or similar exists
             config_parser = ConfigParser({'output_format': 'markdown'}) # Add batch_multiplier here if applicable

     if _converter is None:
         print("Initializing marker models...")
         try:
+            # Set custom font path from environment variable if available
+            font_path = os.environ.get('MARKER_FONT_PATH')
+            if font_path:
+                try:
+                    # Import marker settings and override font path
+                    from marker import settings
+                    os.makedirs(font_path, exist_ok=True)
+                    custom_font_path = os.path.join(font_path, 'NotoSans-Regular.ttf')
+                    settings.FONT_PATH = custom_font_path
+                    print(f"Using custom font path: {custom_font_path}")
+                except ImportError:
+                    print("Could not import marker settings, using default font path")
+                except Exception as e:
+                    print(f"Error setting custom font path: {e}", file=sys.stderr)
             # Create configuration, explicitly setting output format
             # Potential optimization: Check if batch_multiplier or similar exists
             config_parser = ConfigParser({'output_format': 'markdown'}) # Add batch_multiplier here if applicable