Spaces:
Sleeping
Sleeping
Commit
·
41ee299
1
Parent(s):
a49c5dc
Update PDF to Markdown converter API with NVIDIA L4 support
Browse files- Dockerfile +6 -0
- pdf_converter/convert_pdf_to_md.py +15 -0
Dockerfile
CHANGED
@@ -59,12 +59,18 @@ COPY --chown=user:user . .
|
|
59 |
RUN mkdir -p /app/docker_mineru/output/images && \
|
60 |
chown -R user:user /app/docker_mineru/output
|
61 |
|
|
|
|
|
|
|
|
|
62 |
# Set the user
|
63 |
USER user
|
64 |
|
65 |
# Environment variables for caching (optional, might help with model downloads)
|
66 |
ENV HF_HOME=/home/user/.cache/huggingface
|
67 |
ENV TORCH_HOME=/home/user/.cache/torch
|
|
|
|
|
68 |
|
69 |
# Expose the port
|
70 |
EXPOSE 7860
|
|
|
59 |
RUN mkdir -p /app/docker_mineru/output/images && \
|
60 |
chown -R user:user /app/docker_mineru/output
|
61 |
|
62 |
+
# Create marker static directory and set proper permissions (fix for font download error)
|
63 |
+
RUN mkdir -p /usr/local/lib/python3.10/dist-packages/static && \
|
64 |
+
chmod -R 777 /usr/local/lib/python3.10/dist-packages/static
|
65 |
+
|
66 |
# Set the user
|
67 |
USER user
|
68 |
|
69 |
# Environment variables for caching (optional, might help with model downloads)
|
70 |
ENV HF_HOME=/home/user/.cache/huggingface
|
71 |
ENV TORCH_HOME=/home/user/.cache/torch
|
72 |
+
# Add environment variable for marker font path (alternative fix)
|
73 |
+
ENV MARKER_FONT_PATH=/home/user/.cache/marker_fonts
|
74 |
|
75 |
# Expose the port
|
76 |
EXPOSE 7860
|
pdf_converter/convert_pdf_to_md.py
CHANGED
@@ -13,6 +13,21 @@ def initialize_converter():
|
|
13 |
if _converter is None:
|
14 |
print("Initializing marker models...")
|
15 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
# Create configuration, explicitly setting output format
|
17 |
# Potential optimization: Check if batch_multiplier or similar exists
|
18 |
config_parser = ConfigParser({'output_format': 'markdown'}) # Add batch_multiplier here if applicable
|
|
|
13 |
if _converter is None:
|
14 |
print("Initializing marker models...")
|
15 |
try:
|
16 |
+
# Set custom font path from environment variable if available
|
17 |
+
font_path = os.environ.get('MARKER_FONT_PATH')
|
18 |
+
if font_path:
|
19 |
+
try:
|
20 |
+
# Import marker settings and override font path
|
21 |
+
from marker import settings
|
22 |
+
os.makedirs(font_path, exist_ok=True)
|
23 |
+
custom_font_path = os.path.join(font_path, 'NotoSans-Regular.ttf')
|
24 |
+
settings.FONT_PATH = custom_font_path
|
25 |
+
print(f"Using custom font path: {custom_font_path}")
|
26 |
+
except ImportError:
|
27 |
+
print("Could not import marker settings, using default font path")
|
28 |
+
except Exception as e:
|
29 |
+
print(f"Error setting custom font path: {e}", file=sys.stderr)
|
30 |
+
|
31 |
# Create configuration, explicitly setting output format
|
32 |
# Potential optimization: Check if batch_multiplier or similar exists
|
33 |
config_parser = ConfigParser({'output_format': 'markdown'}) # Add batch_multiplier here if applicable
|