rbarman's picture
enable interaction and add debug print
e38e2da
raw
history blame
4.38 kB
import cv2
import matplotlib.pyplot as plt
import numpy as np
from openvino.runtime import Core
import gradio as gr
#####
#Load pretrained model
#####
ie = Core()
model = ie.read_model(model="model/horizontal-text-detection-0001.xml")
compiled_model = ie.compile_model(model=model, device_name="CPU")
input_layer_ir = compiled_model.input(0)
output_layer_ir = compiled_model.output("boxes")
#####
#Inference
#####
def predict(img: np.ndarray, threshold) -> str:
# input: numpy array of image in RGB (see defaults for https://www.gradio.app/docs/#image)
# Text detection models expect an image in BGR format.
image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
# N,C,H,W = batch size, number of channels, height, width.
N, C, H, W = input_layer_ir.shape
# Resize the image to meet network expected input sizes.
resized_image = cv2.resize(image, (W, H))
# Reshape to the network input shape.
input_image = np.expand_dims(resized_image.transpose(2, 0, 1), 0)
# Create an inference request.
boxes = compiled_model([input_image])[output_layer_ir]
# Remove zero only boxes.
boxes = boxes[~np.all(boxes == 0, axis=1)]
print(f'detected {len(boxes)} things')
result = convert_result_to_image(image, resized_image, boxes, threshold=threshold, conf_labels=False)
#plt.figure(figsize=(10, 6))
#plt.axis("off")
#plt.imshow(result)
#print(f'result is: {type(result)}')
#print(result.shape)
#print(result)
result_fp = 'temp_result.jpg'
cv2.imwrite(result_fp, cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
return result_fp
# For each detection, the description is in the [x_min, y_min, x_max, y_max, conf] format:
# The image passed here is in BGR format with changed width and height. To display it in colors expected by matplotlib, use cvtColor function
def convert_result_to_image(bgr_image, resized_image, boxes, threshold=0.3, conf_labels=True):
# Define colors for boxes and descriptions.
colors = {"red": (255, 0, 0), "green": (0, 255, 0)}
# Fetch the image shapes to calculate a ratio.
(real_y, real_x), (resized_y, resized_x) = bgr_image.shape[:2], resized_image.shape[:2]
ratio_x, ratio_y = real_x / resized_x, real_y / resized_y
# Convert the base image from BGR to RGB format.
rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
# Iterate through non-zero boxes.
for box in boxes:
# Pick a confidence factor from the last place in an array.
conf = box[-1]
if conf > threshold:
# Convert float to int and multiply corner position of each box by x and y ratio.
# If the bounding box is found at the top of the image,
# position the upper box bar little lower to make it visible on the image.
(x_min, y_min, x_max, y_max) = [
int(max(corner_position * ratio_y, 10)) if idx % 2
else int(corner_position * ratio_x)
for idx, corner_position in enumerate(box[:-1])
]
# Draw a box based on the position, parameters in rectangle function are: image, start_point, end_point, color, thickness.
rgb_image = cv2.rectangle(rgb_image, (x_min, y_min), (x_max, y_max), colors["green"], 3)
# Add text to the image based on position and confidence.
# Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type.
if conf_labels:
rgb_image = cv2.putText(
rgb_image,
f"{conf:.2f}",
(x_min, y_min - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,
colors["red"],
1,
cv2.LINE_AA,
)
return rgb_image
#####
#Gradio Setup
#####
title = "Text Detection"
description = "Text Detection with OpenVino model"
examples = ['test.jpg']
interpretation='default'
enable_queue=True
gr.Interface(
fn=predict,
inputs=[
gr.inputs.Image(interactive=True),
gr.Slider(minimum=0, maximum=1, value=.3)
],
outputs=gr.outputs.Image(type='filepath'),
title=title,
description=description,
#examples=examples,
interpretation=interpretation,
enable_queue=enable_queue
).launch()