File size: 4,366 Bytes
c106b9c
 
 
 
1f4e46a
 
c106b9c
 
 
 
 
 
 
 
1f4e46a
c106b9c
 
 
44bbf42
c106b9c
 
 
ea854e0
c106b9c
 
 
 
 
 
 
 
 
 
 
 
e38e2da
44bbf42
2365dce
 
 
 
e38e2da
 
18229f3
312de16
 
47b4c33
312de16
c106b9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2365dce
c106b9c
 
 
 
 
 
44bbf42
07f8857
b4f210f
44bbf42
312de16
c106b9c
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import cv2
import matplotlib.pyplot as plt
import numpy as np
from openvino.runtime import Core
import gradio as gr

##### 
#Load pretrained model
##### 
ie = Core()
model = ie.read_model(model="model/horizontal-text-detection-0001.xml")
compiled_model = ie.compile_model(model=model, device_name="CPU")
input_layer_ir = compiled_model.input(0)
output_layer_ir = compiled_model.output("boxes")

##### 
#Inference
##### 
def predict(img: np.ndarray, threshold) -> str:
    # input: numpy array of image in RGB (see defaults for https://www.gradio.app/docs/#image)

    # Text detection models expect an image in BGR format.
    image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    # N,C,H,W = batch size, number of channels, height, width.
    N, C, H, W = input_layer_ir.shape
    # Resize the image to meet network expected input sizes.
    resized_image = cv2.resize(image, (W, H))
    # Reshape to the network input shape.
    input_image = np.expand_dims(resized_image.transpose(2, 0, 1), 0)
    
    
    # Create an inference request.
    boxes = compiled_model([input_image])[output_layer_ir]
    # Remove zero only boxes.
    boxes = boxes[~np.all(boxes == 0, axis=1)]
    print(f'detected {len(boxes)} things')
    result = convert_result_to_image(image, resized_image, boxes, threshold=threshold, conf_labels=False)
    
    #plt.figure(figsize=(10, 6))
    #plt.axis("off")
    #plt.imshow(result)
    #print(f'result is: {type(result)}')
    #print(result.shape)
    #print(result)
    
    result_fp = 'temp_result.jpg'
    cv2.imwrite(result_fp, cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
    return result_fp


# For each detection, the description is in the [x_min, y_min, x_max, y_max, conf] format:
# The image passed here is in BGR format with changed width and height. To display it in colors expected by matplotlib, use cvtColor function
def convert_result_to_image(bgr_image, resized_image, boxes, threshold=0.3, conf_labels=True):
    # Define colors for boxes and descriptions.
    colors = {"red": (255, 0, 0), "green": (0, 255, 0)}

    # Fetch the image shapes to calculate a ratio.
    (real_y, real_x), (resized_y, resized_x) = bgr_image.shape[:2], resized_image.shape[:2]
    ratio_x, ratio_y = real_x / resized_x, real_y / resized_y

    # Convert the base image from BGR to RGB format.
    rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)

    # Iterate through non-zero boxes.
    for box in boxes:
        # Pick a confidence factor from the last place in an array.
        conf = box[-1]
        if conf > threshold:
            # Convert float to int and multiply corner position of each box by x and y ratio.
            # If the bounding box is found at the top of the image, 
            # position the upper box bar little lower to make it visible on the image. 
            (x_min, y_min, x_max, y_max) = [
                int(max(corner_position * ratio_y, 10)) if idx % 2 
                else int(corner_position * ratio_x)
                for idx, corner_position in enumerate(box[:-1])
            ]

            # Draw a box based on the position, parameters in rectangle function are: image, start_point, end_point, color, thickness.
            rgb_image = cv2.rectangle(rgb_image, (x_min, y_min), (x_max, y_max), colors["green"], 3)

            # Add text to the image based on position and confidence.
            # Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type.
            if conf_labels:
                rgb_image = cv2.putText(
                    rgb_image,
                    f"{conf:.2f}",
                    (x_min, y_min - 10),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.8,
                    colors["red"],
                    1,
                    cv2.LINE_AA,
                )

    return rgb_image

##### 
#Gradio Setup
##### 

title = "Text Detection"
description = "Text Detection with OpenVino model"
examples = ['test.jpg']
interpretation='default'
enable_queue=True

gr.Interface(
    fn=predict,
    inputs=[
        gr.inputs.Image(),
        gr.Slider(minimum=0, maximum=1, value=.3)
    ],
    outputs=gr.outputs.Image(type='filepath'),
    title=title,
    description=description,
    #examples=examples,
    interpretation=interpretation,
    enable_queue=enable_queue
    ).launch()