import cv2 import matplotlib.pyplot as plt import numpy as np from openvino.runtime import Core import gradio as gr ##### #Load pretrained model ##### ie = Core() model = ie.read_model(model="model/horizontal-text-detection-0001.xml") compiled_model = ie.compile_model(model=model, device_name="CPU") input_layer_ir = compiled_model.input(0) output_layer_ir = compiled_model.output("boxes") ##### #Inference ##### def predict(img: np.ndarray, threshold) -> str: # input: numpy array of image in RGB (see defaults for https://www.gradio.app/docs/#image) # Text detection models expect an image in BGR format. image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # N,C,H,W = batch size, number of channels, height, width. N, C, H, W = input_layer_ir.shape # Resize the image to meet network expected input sizes. resized_image = cv2.resize(image, (W, H)) # Reshape to the network input shape. input_image = np.expand_dims(resized_image.transpose(2, 0, 1), 0) # Create an inference request. boxes = compiled_model([input_image])[output_layer_ir] # Remove zero only boxes. boxes = boxes[~np.all(boxes == 0, axis=1)] print(f'detected {len(boxes)} things') result = convert_result_to_image(image, resized_image, boxes, threshold=threshold, conf_labels=False) #plt.figure(figsize=(10, 6)) #plt.axis("off") #plt.imshow(result) #print(f'result is: {type(result)}') #print(result.shape) #print(result) result_fp = 'temp_result.jpg' cv2.imwrite(result_fp, cv2.cvtColor(result, cv2.COLOR_BGR2RGB)) return result_fp # For each detection, the description is in the [x_min, y_min, x_max, y_max, conf] format: # The image passed here is in BGR format with changed width and height. To display it in colors expected by matplotlib, use cvtColor function def convert_result_to_image(bgr_image, resized_image, boxes, threshold=0.3, conf_labels=True): # Define colors for boxes and descriptions. colors = {"red": (255, 0, 0), "green": (0, 255, 0)} # Fetch the image shapes to calculate a ratio. (real_y, real_x), (resized_y, resized_x) = bgr_image.shape[:2], resized_image.shape[:2] ratio_x, ratio_y = real_x / resized_x, real_y / resized_y # Convert the base image from BGR to RGB format. rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB) # Iterate through non-zero boxes. for box in boxes: # Pick a confidence factor from the last place in an array. conf = box[-1] if conf > threshold: # Convert float to int and multiply corner position of each box by x and y ratio. # If the bounding box is found at the top of the image, # position the upper box bar little lower to make it visible on the image. (x_min, y_min, x_max, y_max) = [ int(max(corner_position * ratio_y, 10)) if idx % 2 else int(corner_position * ratio_x) for idx, corner_position in enumerate(box[:-1]) ] # Draw a box based on the position, parameters in rectangle function are: image, start_point, end_point, color, thickness. rgb_image = cv2.rectangle(rgb_image, (x_min, y_min), (x_max, y_max), colors["green"], 3) # Add text to the image based on position and confidence. # Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type. if conf_labels: rgb_image = cv2.putText( rgb_image, f"{conf:.2f}", (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, colors["red"], 1, cv2.LINE_AA, ) return rgb_image ##### #Gradio Setup ##### title = "Text Detection" description = "Text Detection with OpenVino model" examples = ['test.jpg'] interpretation='default' enable_queue=True gr.Interface( fn=predict, inputs=[ gr.inputs.Image(), gr.Slider(minimum=0, maximum=1, value=.3) ], outputs=gr.outputs.Image(type='filepath'), title=title, description=description, #examples=examples, interpretation=interpretation, enable_queue=enable_queue ).launch()