marcosremar2 commited on
Commit
44df236
·
1 Parent(s): b8fca79

Initial deployment

Browse files
Files changed (4) hide show
  1. Dockerfile +49 -0
  2. README.md +4 -9
  3. app.py +15 -0
  4. requirements.txt +3 -0
Dockerfile ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ubuntu:22.04
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+
5
+ RUN apt-get update && \
6
+ apt-get install -y software-properties-common && \
7
+ add-apt-repository ppa:deadsnakes/ppa && \
8
+ apt-get update && \
9
+ apt-get install -y \
10
+ python3.10 \
11
+ python3.10-venv \
12
+ python3.10-distutils \
13
+ python3-pip \
14
+ wget \
15
+ git \
16
+ libgl1 \
17
+ libreoffice \
18
+ fonts-noto-cjk \
19
+ fonts-wqy-zenhei \
20
+ fonts-wqy-microhei \
21
+ ttf-mscorefonts-installer \
22
+ fontconfig \
23
+ libglib2.0-0 \
24
+ libxrender1 \
25
+ libsm6 \
26
+ libxext6 \
27
+ poppler-utils && \
28
+ rm -rf /var/lib/apt/lists/*
29
+
30
+ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
31
+
32
+ WORKDIR /app
33
+
34
+ COPY requirements.txt .
35
+
36
+ RUN python3 -m venv /opt/mineru_venv && \
37
+ source /opt/mineru_venv/bin/activate && \
38
+ pip install --upgrade pip && \
39
+ pip install -r requirements.txt
40
+
41
+ # Download model + setup config
42
+ RUN wget https://github.com/opendatalab/MinerU/raw/master/scripts/download_models_hf.py -O download_models.py && \
43
+ python3 download_models.py && \
44
+ wget https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json -O /root/magic-pdf.json && \
45
+ sed -i 's|"device": "cpu"|"device": "cpu"|g' /root/magic-pdf.json
46
+
47
+ COPY app.py .
48
+
49
+ CMD ["/opt/mineru_venv/bin/uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,5 @@
1
- ---
2
- title: Docker Mineru
3
- emoji: 🏆
4
- colorFrom: green
5
- colorTo: pink
6
- sdk: docker
7
- pinned: false
8
- ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
+ # MinerU PDF Extractor (Docker Space)
 
 
 
 
 
 
 
2
 
3
+ This Hugging Face Space uses `magic-pdf` to extract structured content from PDFs using FastAPI.
4
+
5
+ Send a `POST` request to `/extract` with a PDF file to receive extracted results.
app.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File
2
+ from fastapi.responses import JSONResponse
3
+ from magic_pdf import MinerU
4
+
5
+ app = FastAPI()
6
+ model = MinerU(config_path="/root/magic-pdf.json")
7
+
8
+ @app.post("/extract")
9
+ async def extract(file: UploadFile = File(...)):
10
+ content = await file.read()
11
+ try:
12
+ result = model.extract(content)
13
+ return {"result": result}
14
+ except Exception as e:
15
+ return JSONResponse(status_code=500, content={"error": str(e)})
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ magic-pdf[full]