Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ from huggingface_hub import hf_hub_download
|
|
4 |
from transformers import AutoImageProcessor, TableTransformerForObjectDetection
|
5 |
import torch
|
6 |
from PIL import Image
|
|
|
7 |
|
8 |
# Model and Image Processor Loading (ideally at the app start)
|
9 |
@st.cache_resource
|
@@ -15,17 +16,25 @@ def load_assets():
|
|
15 |
|
16 |
file_path, image_processor, model = load_assets()
|
17 |
|
|
|
18 |
# App Title
|
19 |
-
st.title(" Detection in
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
-
|
22 |
-
|
|
|
|
|
23 |
|
24 |
-
#
|
25 |
-
|
26 |
-
|
27 |
-
inputs = image_processor(images=image, return_tensors="pt")
|
28 |
-
outputs = model(**inputs)
|
29 |
|
30 |
target_sizes = torch.tensor([image.size[::-1]])
|
31 |
results = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[0]
|
|
|
4 |
from transformers import AutoImageProcessor, TableTransformerForObjectDetection
|
5 |
import torch
|
6 |
from PIL import Image
|
7 |
+
import fitz # Import PyMuPDF (fitz)
|
8 |
|
9 |
# Model and Image Processor Loading (ideally at the app start)
|
10 |
@st.cache_resource
|
|
|
16 |
|
17 |
file_path, image_processor, model = load_assets()
|
18 |
|
19 |
+
|
20 |
# App Title
|
21 |
+
st.title("Table Detection in Documents")
|
22 |
+
|
23 |
+
# Document Upload
|
24 |
+
uploaded_file = st.file_uploader("Upload a document", type=["pdf", "docx", "doc"]) # Add more formats if needed
|
25 |
+
|
26 |
+
# Process Document and Display Results
|
27 |
+
if uploaded_file:
|
28 |
+
doc = fitz.open(stream=uploaded_file.getvalue(), filetype="pdf") # Open as PDF
|
29 |
|
30 |
+
for page_index in range(len(doc)):
|
31 |
+
page = doc.load_page(page_index)
|
32 |
+
pix = page.get_pixmap()
|
33 |
+
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
34 |
|
35 |
+
# Table Detection (your existing logic)
|
36 |
+
inputs = image_processor(images=image, return_tensors="pt")
|
37 |
+
outputs = model(**inputs)
|
|
|
|
|
38 |
|
39 |
target_sizes = torch.tensor([image.size[::-1]])
|
40 |
results = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[0]
|