switch to Docker space
Browse files- Dockerfile +13 -0
- README.md +2 -5
- app.py +1 -19
- packages.txt +0 -2
- requirements.txt +0 -9
Dockerfile
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10
|
2 |
+
|
3 |
+
ARG PIP_NO_CACHE_DIR=1
|
4 |
+
|
5 |
+
RUN apt install tesseract-ocr
|
6 |
+
RUN wget http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb \
|
7 |
+
&& dpkg -i libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb
|
8 |
+
|
9 |
+
RUN pip install torch==2.1.1 -i https://download.pytorch.org/whl/cpu
|
10 |
+
RUN pip install paddlepaddle==2.5.1 -i https://mirror.baidu.com/pypi/simple
|
11 |
+
RUN pip install transformers pytesseract paddleocr gradio Pillow
|
12 |
+
|
13 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
@@ -1,12 +1,9 @@
|
|
1 |
---
|
2 |
title: Layoutlm Docvqa Paddleocr
|
3 |
-
emoji:
|
4 |
colorFrom: indigo
|
5 |
colorTo: yellow
|
6 |
-
sdk:
|
7 |
-
sdk_version: 4.8.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
title: Layoutlm Docvqa Paddleocr
|
3 |
+
emoji: π
|
4 |
colorFrom: indigo
|
5 |
colorTo: yellow
|
6 |
+
sdk: docker
|
|
|
|
|
|
|
7 |
---
|
8 |
|
9 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -1,19 +1,3 @@
|
|
1 |
-
import sys
|
2 |
-
|
3 |
-
if sys.platform == "linux":
|
4 |
-
try:
|
5 |
-
import paddle
|
6 |
-
|
7 |
-
except ImportError:
|
8 |
-
import os
|
9 |
-
|
10 |
-
# install libssl1.1 on HF spaces
|
11 |
-
os.system(
|
12 |
-
"wget http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb"
|
13 |
-
)
|
14 |
-
os.system("dpkg -x libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb .")
|
15 |
-
os.environ["LD_LIBRARY_PATH"] = os.environ.get("LD_LIBRARY_PATH", "") + ":./usr/lib/x86_64-linux-gnu"
|
16 |
-
|
17 |
import cv2
|
18 |
import gradio as gr
|
19 |
import numpy as np
|
@@ -45,9 +29,7 @@ def predict(image: Image.Image, question: str, ocr_engine: str):
|
|
45 |
boxes = np.asarray([x[0] for x in ocr_result]) # (n_boxes, 4, 2)
|
46 |
|
47 |
for box in boxes:
|
48 |
-
cv2.polylines(
|
49 |
-
image_np, [box.reshape(-1, 1, 2).astype(int)], True, (0, 255, 255), 3
|
50 |
-
)
|
51 |
|
52 |
x1 = boxes[:, :, 0].min(1) * 1000 / image.width
|
53 |
y1 = boxes[:, :, 1].min(1) * 1000 / image.height
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import cv2
|
2 |
import gradio as gr
|
3 |
import numpy as np
|
|
|
29 |
boxes = np.asarray([x[0] for x in ocr_result]) # (n_boxes, 4, 2)
|
30 |
|
31 |
for box in boxes:
|
32 |
+
cv2.polylines(image_np, [box.reshape(-1, 1, 2).astype(int)], True, (0, 255, 255), 3)
|
|
|
|
|
33 |
|
34 |
x1 = boxes[:, :, 0].min(1) * 1000 / image.width
|
35 |
y1 = boxes[:, :, 1].min(1) * 1000 / image.height
|
packages.txt
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
sudo
|
2 |
-
tesseract-ocr
|
|
|
|
|
|
requirements.txt
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
--index-url https://download.pytorch.org/whl/cpu
|
2 |
-
--extra-index-url https://mirror.baidu.com/pypi/simple
|
3 |
-
numpy
|
4 |
-
torch
|
5 |
-
transformers
|
6 |
-
paddlepaddle==2.5.1
|
7 |
-
paddleocr
|
8 |
-
opencv-python-headless
|
9 |
-
pytesseract
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|