import os import math import re import ast import gradio as gr import numpy as np import pandas as pd from doctr.io import DocumentFile from doctr.models import ocr_predictor from PIL import Image, ImageDraw img_temp = "tp" sub_img_temp = "tp1" def load_model(): return ocr_predictor( det_arch='linknet_resnet18_rotation', reco_arch='crnn_vgg16_bn', detect_orientation=True, assume_straight_pages=False, pretrained=True, pretrained_backbone=True, export_as_straight_boxes=True, preserve_aspect_ratio=True, ) def convert_coordinates(geometry, page_dim, i, j): len_x = page_dim[1] len_y = page_dim[0] (x_min, y_min) = geometry[0] (x_max, y_max) = geometry[1] x_min = (math.floor(x_min * len_x)) + i*len_x x_max = (math.ceil(x_max * len_x)) + i*len_x y_min = (math.floor(y_min * len_y)) + j*len_y y_max = (math.ceil(y_max * len_y)) + j*len_y return [x_min, x_max, y_min, y_max] def get_coordinates(output, x, y): page_dim = output['pages'][0]["dimensions"] raw_data = [] for obj1 in output['pages'][0]["blocks"]: for obj2 in obj1["lines"]: for obj3 in obj2["words"]: converted_coordinates = convert_coordinates(obj3["geometry"],page_dim, x, y) raw_data.append("{}: {}".format(converted_coordinates,obj3["value"])) return raw_data def get_vals(file_path, wh): model = load_model() Data, counter = [], 1 for i in range(wh): # split_var is fixed for j in range(wh): path = f"{file_path}/{counter}.jpg" temp_doc = DocumentFile.from_images(path) output = model(temp_doc).export() data = get_coordinates(output, i, j) counter += 1 Data.extend(data) return Data def clean_dir(path): files = os.listdir(path=path) # return files for i in range(1,len(files)+1): os.remove(f"{path}/{i}.jpg") def html_path(img, counter): img.save(f"{sub_img_temp}/{counter}.jpg") return f"" def create_box(l): # l represents the bounds of box return (l[0], l[2], l[1], l[3]) def process(filepath, regex, size=(1656,1170)): clean_dir(path=img_temp) clean_dir(path=sub_img_temp) # return [f1, f2] img = Image.open(filepath) (width, height), parts, counter, dimensions, im_, values = img.size, [], 0, [], [], [] for i in range(0, width, size[0]): for j in range(0, height, size[1]): counter += 1 box = (i, j, i+size[0], j+size[1]) img.crop(box).save(f"{img_temp}/{counter}.jpg") parts.append(img.crop(box)) temp= os.listdir(path=img_temp) # temp represents a temporary variable that contains directory information if regex == 'Regex-1': pattern = re.compile(r"^\s\b\d+([\.,]\d+)?") else: pattern = re.compile(r"\d+") data = get_vals(img_temp, wh=math.floor(math.sqrt(len(temp)))) counter, idx = 1, [] for d in data: dimensions.append(ast.literal_eval(d.split(':')[0])) im_.append(html_path(img.crop(create_box(ast.literal_eval(d.split(':')[0]))), counter=counter)) values.append(d.split(':')[1]) counter += 1 metadata = pd.DataFrame(zip(dimensions, im_, values), columns=['Coordinates','Image','Value']) df = metadata[metadata['Value'].str.contains(pattern)] #[img.size] moreover df is a chunk taken from metadata which contains the regex pattern. return df#.to_markdown() def main(): demo = gr.Interface( fn=process, inputs=[gr.Image(type="filepath", interactive=True),gr.Dropdown(['Regex-1'])], outputs=gr.DataFrame(wrap=True, datatype = ["str", "markdown", "str"], interactive=True), # outputs = "list", title="OCR", description="Issue with filesystem...not able to parse all files in the folders", ) demo.launch(debug=True, show_error=True) if __name__=="__main__": main()