Spaces:
Build error
Build error
from typing import Tuple, List, Sequence, Optional, Union | |
from torchvision import transforms | |
from torch import nn, Tensor | |
from PIL import Image | |
from pathlib import Path | |
from bs4 import BeautifulSoup as bs | |
import numpy as np | |
import numpy.typing as npt | |
from numpy import uint8 | |
ImageType = npt.NDArray[uint8] | |
from transformers import AutoModelForObjectDetection | |
import torch | |
import matplotlib.pyplot as plt | |
import matplotlib.patches as patches | |
from matplotlib.patches import Patch | |
from utils import draw_only_box | |
from unitable import UnitableFullPredictor | |
from ultralyticsplus import YOLO, render_result | |
""" | |
USES YOLO FOR DETECITON INSTEAD OF TABLE TRANSFORMER | |
Table TransFORMER | |
""" | |
class DetectionAndOcrTable4(): | |
#This components can take in entire pdf page as input , scan for tables and return the table in html format | |
#Uses the full unitable model - different to DetectionAndOcrTable1 | |
def __init__(self): | |
self.unitableFullPredictor = UnitableFullPredictor() | |
self.detector = YOLO('foduucom/table-detection-and-extraction') | |
# set model parameters | |
self.detector.overrides['conf'] = 0.25 # NMS confidence threshold | |
self.detector.overrides['iou'] = 0.45 # NMS IoU threshold | |
self.detector.overrides['agnostic_nms'] = False # NMS class-agnostic | |
self.detector.overrides['max_det'] = 1000 # maximum number of detections per image | |
def save_detection(detected_lines_images:List[ImageType], prefix = './res/test1/res_'): | |
i = 0 | |
for img in detected_lines_images: | |
pilimg = Image.fromarray(img) | |
pilimg.save(prefix+str(i)+'.png') | |
i=i+1 | |
""" | |
Valid 'Boxes' object attributes and properties are: | |
Attributes: | |
boxes (torch.Tensor) or (numpy.ndarray): A tensor or numpy array containing the detection boxes, | |
with shape (num_boxes, 6). | |
orig_shape (torch.Tensor) or (numpy.ndarray): Original image size, in the format (height, width). | |
Properties: | |
xyxy (torch.Tensor) or (numpy.ndarray): The boxes in xyxy format. | |
conf (torch.Tensor) or (numpy.ndarray): The confidence values of the boxes. | |
cls (torch.Tensor) or (numpy.ndarray): The class values of the boxes. | |
xywh (torch.Tensor) or (numpy.ndarray): The boxes in xywh format. | |
xyxyn (torch.Tensor) or (numpy.ndarray): The boxes in xyxy format normalized by original image size. | |
xywhn (torch.Tensor) or (numpy.ndarray): The boxes in xywh format normalized by original image size. | |
""" | |
# Image is page image | |
def predict(self,image:Image.Image,debugfolder_filename_page_name = None): | |
results = self.detector.predict(image) | |
#Array of bboxes | |
bbxs = results[0].boxes.xyxy.int().tolist() | |
#Array of confidences | |
conf = results[0].boxes.conf.float().tolist() | |
print(bbxs) | |
print(conf) | |
#images_to_recognizer = cropImage(bxs, img) | |
img_to_save = draw_only_box(image, bbxs) | |
img_to_save.save(debugfolder_filename_page_name+"detectionBoxRes.png", quality=95) | |
# we need something to draw the detection | |
cropped_tables =[] | |
for i in range (len(bbxs)): | |
# TODO: find the right confidence and padding values | |
if conf[i]< 0.65: | |
continue | |
padded = [bbxs[i][0]-10,bbxs[i][1]-10,bbxs[i][2]+10,bbxs[i][3]+10] | |
cropped_table = image.convert("RGB").crop(padded) | |
cropped_table.save(debugfolder_filename_page_name +"yolo_cropped_table_"+str(i)+".png") | |
cropped_tables.append(cropped_table) | |
print("number of cropped tables found: "+str(len(cropped_tables))) | |
# Step 1: Unitable | |
#This take PIL Images as input | |
if cropped_tables != []: | |
table_codes = self.unitableFullPredictor.predict(cropped_tables,debugfolder_filename_page_name) | |
return table_codes | |