File size: 4,064 Bytes
fc9d076 8818cf9 fc9d076 a049953 fc9d076 a049953 fc9d076 a049953 fc9d076 a049953 fc9d076 a049953 fc9d076 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import os
import math
import re
import ast
import gradio as gr
import numpy as np
import pandas as pd
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
from PIL import Image, ImageDraw
img_temp = "tp"
sub_img_temp = "tp1"
def load_model():
return ocr_predictor(
det_arch='linknet_resnet18_rotation',
reco_arch='crnn_vgg16_bn',
detect_orientation=True,
assume_straight_pages=False,
pretrained=True,
pretrained_backbone=True,
export_as_straight_boxes=True,
preserve_aspect_ratio=True,
)
def convert_coordinates(geometry, page_dim, i, j):
len_x = page_dim[1]
len_y = page_dim[0]
(x_min, y_min) = geometry[0]
(x_max, y_max) = geometry[1]
x_min = (math.floor(x_min * len_x)) + i*len_x
x_max = (math.ceil(x_max * len_x)) + i*len_x
y_min = (math.floor(y_min * len_y)) + j*len_y
y_max = (math.ceil(y_max * len_y)) + j*len_y
return [x_min, x_max, y_min, y_max]
def get_coordinates(output, x, y):
page_dim = output['pages'][0]["dimensions"]
raw_data = []
for obj1 in output['pages'][0]["blocks"]:
for obj2 in obj1["lines"]:
for obj3 in obj2["words"]:
converted_coordinates = convert_coordinates(obj3["geometry"],page_dim, x, y)
raw_data.append("{}: {}".format(converted_coordinates,obj3["value"]))
return raw_data
def get_vals(file_path, wh):
model = load_model()
Data, counter = [], 1
for i in range(wh): # split_var is fixed
for j in range(wh):
path = f"{file_path}/{counter}.jpg"
temp_doc = DocumentFile.from_images(path)
output = model(temp_doc).export()
data = get_coordinates(output, i, j)
counter += 1
Data.extend(data)
return Data
def clean_dir(path):
files = os.listdir(path=path)
# return files
for i in range(1,len(files)+1):
os.remove(f"{path}/{i}.jpg")
def html_path(img, counter):
img.save(f"{sub_img_temp}/{counter}.jpg")
return f"<img src='/file={sub_img_temp}/{counter}.jpg'></img>"
def create_box(l): # l represents the bounds of box
return (l[0], l[2], l[1], l[3])
def process(filepath, regex, size=(1656,1170)):
clean_dir(path=img_temp)
clean_dir(path=sub_img_temp)
# return [f1, f2]
img = Image.open(filepath)
(width, height), parts, counter, dimensions, im_, values = img.size, [], 0, [], [], []
for i in range(0, width, size[0]):
for j in range(0, height, size[1]):
counter += 1
box = (i, j, i+size[0], j+size[1])
img.crop(box).save(f"{img_temp}/{counter}.jpg")
parts.append(img.crop(box))
temp= os.listdir(path=img_temp) # temp represents a temporary variable that contains directory information
if regex == 'Regex-1':
pattern = re.compile(r"^\s\b\d+([\.,]\d+)?")
else:
pattern = re.compile(r"\d+")
data = get_vals(img_temp, wh=math.floor(math.sqrt(len(temp))))
counter, idx = 1, []
for d in data:
dimensions.append(ast.literal_eval(d.split(':')[0]))
im_.append(html_path(img.crop(create_box(ast.literal_eval(d.split(':')[0]))), counter=counter))
values.append(d.split(':')[1])
counter += 1
metadata = pd.DataFrame(zip(dimensions, im_, values), columns=['Coordinates','Image','Value'])
df = metadata[metadata['Value'].str.contains(pattern)] #[img.size] moreover df is a chunk taken from metadata which contains the regex pattern.
return df#.to_markdown()
def main():
demo = gr.Interface(
fn=process,
inputs=[gr.Image(type="filepath", interactive=True),gr.Dropdown(['Regex-1'])],
outputs=gr.DataFrame(wrap=True, datatype = ["str", "markdown", "str"], interactive=True),
# outputs = "list",
title="OCR",
description="Issue with filesystem...not able to parse all files in the folders",
)
demo.launch(debug=True, show_error=True)
if __name__=="__main__":
main() |