Spaces:

gaunernst
/

layoutlm-docvqa-paddleocr

Running

gaunernst commited on Dec 6, 2023

Commit

71a9e68

1 Parent(s): 64e942f

switch to Docker space

Files changed (5) hide show

Dockerfile ADDED Viewed

+FROM python:3.10
+ARG PIP_NO_CACHE_DIR=1
+RUN apt install tesseract-ocr
+RUN wget http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb \
+    && dpkg -i libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb
+RUN pip install torch==2.1.1 -i https://download.pytorch.org/whl/cpu
+RUN pip install paddlepaddle==2.5.1 -i https://mirror.baidu.com/pypi/simple
+RUN pip install transformers pytesseract paddleocr gradio Pillow
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,12 +1,9 @@
 ---
 title: Layoutlm Docvqa Paddleocr
-emoji: 🏃
 colorFrom: indigo
 colorTo: yellow
-sdk: gradio
-sdk_version: 4.8.0
-app_file: app.py
-pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Layoutlm Docvqa Paddleocr
+emoji: 📄
 colorFrom: indigo
 colorTo: yellow
+sdk: docker
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,19 +1,3 @@
-import sys
-if sys.platform == "linux":
-    try:
-        import paddle
-    except ImportError:
-        import os
-        # install libssl1.1 on HF spaces
-        os.system(
-            "wget http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb"
-        )
-        os.system("dpkg -x libssl1.1_1.1.1f-1ubuntu2.20_amd64.deb .")
-        os.environ["LD_LIBRARY_PATH"] = os.environ.get("LD_LIBRARY_PATH", "") + ":./usr/lib/x86_64-linux-gnu"
 import cv2
 import gradio as gr
 import numpy as np
@@ -45,9 +29,7 @@ def predict(image: Image.Image, question: str, ocr_engine: str):
         boxes = np.asarray([x[0] for x in ocr_result])  # (n_boxes, 4, 2)
         for box in boxes:
-            cv2.polylines(
-                image_np, [box.reshape(-1, 1, 2).astype(int)], True, (0, 255, 255), 3
-            )
         x1 = boxes[:, :, 0].min(1) * 1000 / image.width
         y1 = boxes[:, :, 1].min(1) * 1000 / image.height

 import cv2
 import gradio as gr
 import numpy as np
         boxes = np.asarray([x[0] for x in ocr_result])  # (n_boxes, 4, 2)
         for box in boxes:
+            cv2.polylines(image_np, [box.reshape(-1, 1, 2).astype(int)], True, (0, 255, 255), 3)
         x1 = boxes[:, :, 0].min(1) * 1000 / image.width
         y1 = boxes[:, :, 1].min(1) * 1000 / image.height

packages.txt DELETED Viewed

requirements.txt DELETED Viewed

@@ -1,9 +0,0 @@
---index-url https://download.pytorch.org/whl/cpu
---extra-index-url https://mirror.baidu.com/pypi/simple
-numpy
-torch
-transformers
-paddlepaddle==2.5.1
-paddleocr
-opencv-python-headless
-pytesseract