Spaces:
Runtime error
Runtime error
import cv2 | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from openvino.runtime import Core | |
import gradio as gr | |
##### | |
#Load pretrained model | |
##### | |
ie = Core() | |
model = ie.read_model(model="model/horizontal-text-detection-0001.xml") | |
compiled_model = ie.compile_model(model=model, device_name="CPU") | |
input_layer_ir = compiled_model.input(0) | |
output_layer_ir = compiled_model.output("boxes") | |
##### | |
#Inference | |
##### | |
def predict(img: np.ndarray, threshold) -> str: | |
# input: numpy array of image in RGB (see defaults for https://www.gradio.app/docs/#image) | |
# Text detection models expect an image in BGR format. | |
image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
# N,C,H,W = batch size, number of channels, height, width. | |
N, C, H, W = input_layer_ir.shape | |
# Resize the image to meet network expected input sizes. | |
resized_image = cv2.resize(image, (W, H)) | |
# Reshape to the network input shape. | |
input_image = np.expand_dims(resized_image.transpose(2, 0, 1), 0) | |
# Create an inference request. | |
boxes = compiled_model([input_image])[output_layer_ir] | |
# Remove zero only boxes. | |
boxes = boxes[~np.all(boxes == 0, axis=1)] | |
print(f'detected {len(boxes)} things') | |
result = convert_result_to_image(image, resized_image, boxes, threshold=threshold, conf_labels=False) | |
#plt.figure(figsize=(10, 6)) | |
#plt.axis("off") | |
#plt.imshow(result) | |
#print(f'result is: {type(result)}') | |
#print(result.shape) | |
#print(result) | |
result_fp = 'temp_result.jpg' | |
cv2.imwrite(result_fp, cv2.cvtColor(result, cv2.COLOR_BGR2RGB)) | |
return result_fp | |
# For each detection, the description is in the [x_min, y_min, x_max, y_max, conf] format: | |
# The image passed here is in BGR format with changed width and height. To display it in colors expected by matplotlib, use cvtColor function | |
def convert_result_to_image(bgr_image, resized_image, boxes, threshold=0.3, conf_labels=True): | |
# Define colors for boxes and descriptions. | |
colors = {"red": (255, 0, 0), "green": (0, 255, 0)} | |
# Fetch the image shapes to calculate a ratio. | |
(real_y, real_x), (resized_y, resized_x) = bgr_image.shape[:2], resized_image.shape[:2] | |
ratio_x, ratio_y = real_x / resized_x, real_y / resized_y | |
# Convert the base image from BGR to RGB format. | |
rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB) | |
# Iterate through non-zero boxes. | |
for box in boxes: | |
# Pick a confidence factor from the last place in an array. | |
conf = box[-1] | |
if conf > threshold: | |
# Convert float to int and multiply corner position of each box by x and y ratio. | |
# If the bounding box is found at the top of the image, | |
# position the upper box bar little lower to make it visible on the image. | |
(x_min, y_min, x_max, y_max) = [ | |
int(max(corner_position * ratio_y, 10)) if idx % 2 | |
else int(corner_position * ratio_x) | |
for idx, corner_position in enumerate(box[:-1]) | |
] | |
# Draw a box based on the position, parameters in rectangle function are: image, start_point, end_point, color, thickness. | |
rgb_image = cv2.rectangle(rgb_image, (x_min, y_min), (x_max, y_max), colors["green"], 3) | |
# Add text to the image based on position and confidence. | |
# Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type. | |
if conf_labels: | |
rgb_image = cv2.putText( | |
rgb_image, | |
f"{conf:.2f}", | |
(x_min, y_min - 10), | |
cv2.FONT_HERSHEY_SIMPLEX, | |
0.8, | |
colors["red"], | |
1, | |
cv2.LINE_AA, | |
) | |
return rgb_image | |
##### | |
#Gradio Setup | |
##### | |
title = "Text Detection" | |
description = "Text Detection with OpenVino model" | |
examples = ['test.jpg'] | |
interpretation='default' | |
enable_queue=True | |
gr.Interface( | |
fn=predict, | |
inputs=[ | |
gr.inputs.Image(), | |
gr.Slider(minimum=0, maximum=1, value=.3) | |
], | |
outputs=gr.outputs.Image(type='filepath'), | |
title=title, | |
description=description, | |
#examples=examples, | |
interpretation=interpretation, | |
enable_queue=enable_queue | |
).launch() |