File size: 4,064 Bytes
fc9d076
 
 
 
 
 
 
 
 
 
 
8818cf9
 
fc9d076
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a049953
 
 
fc9d076
 
 
 
 
 
 
 
 
a049953
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc9d076
a049953
 
 
 
 
 
 
 
 
fc9d076
a049953
fc9d076
 
 
 
 
 
a049953
 
 
 
fc9d076
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import os
import math
import re
import ast
import gradio as gr
import numpy as np
import pandas as pd
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
from PIL import Image, ImageDraw

img_temp = "tp"
sub_img_temp = "tp1"

def load_model():
    return ocr_predictor(
        det_arch='linknet_resnet18_rotation',
        reco_arch='crnn_vgg16_bn',
        detect_orientation=True,
        assume_straight_pages=False,
        pretrained=True,
        pretrained_backbone=True,
        export_as_straight_boxes=True,
        preserve_aspect_ratio=True,
      )

def convert_coordinates(geometry, page_dim, i, j):
    len_x = page_dim[1]
    len_y = page_dim[0]
    (x_min, y_min) = geometry[0]
    (x_max, y_max) = geometry[1]
    x_min = (math.floor(x_min * len_x)) + i*len_x
    x_max = (math.ceil(x_max * len_x)) + i*len_x
    y_min = (math.floor(y_min * len_y)) + j*len_y
    y_max = (math.ceil(y_max * len_y)) +  j*len_y
    return [x_min, x_max, y_min, y_max]

def get_coordinates(output, x, y):
    page_dim = output['pages'][0]["dimensions"]
    raw_data = []
    for obj1 in output['pages'][0]["blocks"]:
        for obj2 in obj1["lines"]:
            for obj3 in obj2["words"]:
                converted_coordinates = convert_coordinates(obj3["geometry"],page_dim, x, y)
                raw_data.append("{}: {}".format(converted_coordinates,obj3["value"]))
    return raw_data

def get_vals(file_path, wh):
    model = load_model()
    Data, counter = [], 1
    for i in range(wh):      # split_var is fixed
        for j in range(wh):
            path = f"{file_path}/{counter}.jpg"
            temp_doc = DocumentFile.from_images(path)
            output = model(temp_doc).export()
            data = get_coordinates(output, i, j)
            counter += 1
            Data.extend(data)
    return Data

def clean_dir(path):
    files = os.listdir(path=path)
    # return files
    for i in range(1,len(files)+1):
      os.remove(f"{path}/{i}.jpg")
      
def html_path(img, counter):
    img.save(f"{sub_img_temp}/{counter}.jpg")
    return f"<img src='/file={sub_img_temp}/{counter}.jpg'></img>"

def create_box(l):  # l represents the bounds of box
    return (l[0], l[2], l[1], l[3])

def process(filepath, regex, size=(1656,1170)):
    clean_dir(path=img_temp)
    clean_dir(path=sub_img_temp)
    # return [f1, f2]
    img = Image.open(filepath)
    (width, height), parts, counter, dimensions, im_, values = img.size, [], 0, [], [], []
    for i in range(0, width, size[0]):
        for j in range(0, height, size[1]):
            counter += 1
            box = (i, j, i+size[0], j+size[1])
            img.crop(box).save(f"{img_temp}/{counter}.jpg")
            parts.append(img.crop(box))
    temp= os.listdir(path=img_temp) # temp represents a temporary variable that contains directory information
    if regex == 'Regex-1':
        pattern = re.compile(r"^\s\b\d+([\.,]\d+)?")
    else:
        pattern = re.compile(r"\d+")
    
    data = get_vals(img_temp, wh=math.floor(math.sqrt(len(temp))))
    counter, idx = 1, []
    for d in data:
        dimensions.append(ast.literal_eval(d.split(':')[0]))
        im_.append(html_path(img.crop(create_box(ast.literal_eval(d.split(':')[0]))), counter=counter))
        values.append(d.split(':')[1])
        counter += 1
    metadata = pd.DataFrame(zip(dimensions, im_, values), columns=['Coordinates','Image','Value'])
    df =  metadata[metadata['Value'].str.contains(pattern)]  #[img.size] moreover df is a chunk taken from metadata which contains the regex pattern.

    return df#.to_markdown()

def main():
    
    demo = gr.Interface(
        fn=process,
        inputs=[gr.Image(type="filepath", interactive=True),gr.Dropdown(['Regex-1'])],
        outputs=gr.DataFrame(wrap=True, datatype = ["str", "markdown", "str"], interactive=True),
        # outputs = "list",
        title="OCR",
        description="Issue with filesystem...not able to parse all files in the folders",
    )
    demo.launch(debug=True, show_error=True)

if __name__=="__main__":
    main()