|
from PIL import Image |
|
import numpy as np |
|
import torch |
|
from torchvision import transforms, models |
|
from onnx import numpy_helper |
|
import os |
|
import onnxruntime as rt |
|
from matplotlib.colors import hsv_to_rgb |
|
import cv2 |
|
import gradio as gr |
|
|
|
|
|
def preprocess(image): |
|
|
|
ratio = 800.0 / min(image.size[0], image.size[1]) |
|
image = image.resize((int(ratio * image.size[0]), int(ratio * image.size[1])), Image.BILINEAR) |
|
|
|
|
|
image = np.array(image)[:, :, [2, 1, 0]].astype('float32') |
|
|
|
|
|
image = np.transpose(image, [2, 0, 1]) |
|
|
|
|
|
mean_vec = np.array([102.9801, 115.9465, 122.7717]) |
|
for i in range(image.shape[0]): |
|
image[i, :, :] = image[i, :, :] - mean_vec[i] |
|
|
|
|
|
import math |
|
padded_h = int(math.ceil(image.shape[1] / 32) * 32) |
|
padded_w = int(math.ceil(image.shape[2] / 32) * 32) |
|
|
|
padded_image = np.zeros((3, padded_h, padded_w), dtype=np.float32) |
|
padded_image[:, :image.shape[1], :image.shape[2]] = image |
|
image = padded_image |
|
|
|
return image |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
os.system("wget https://github.com/AK391/models/raw/main/vision/object_detection_segmentation/mask-rcnn/model/MaskRCNN-10.onnx") |
|
sess = rt.InferenceSession("MaskRCNN-10.onnx") |
|
|
|
outputs = sess.get_outputs() |
|
|
|
|
|
classes = [line.rstrip('\n') for line in open('coco_classes.txt')] |
|
num_classes = len(classes) |
|
|
|
def get_palette(): |
|
|
|
palette = [0] * num_classes * 3 |
|
|
|
for hue in range(num_classes): |
|
if hue == 0: |
|
colors = (0, 0, 0) |
|
else: |
|
colors = hsv_to_rgb((hue / num_classes, 0.75, 0.75)) |
|
|
|
for i in range(3): |
|
palette[hue * 3 + i] = int(colors[i] * 255) |
|
|
|
return palette |
|
|
|
def colorize(labels): |
|
|
|
result_img = Image.fromarray(labels).convert('P', colors=num_classes) |
|
result_img.putpalette(get_palette()) |
|
return np.array(result_img.convert('RGB')) |
|
|
|
def visualize_output(image, output): |
|
assert(image.shape[0] == output.shape[1] and \ |
|
image.shape[1] == output.shape[2]) |
|
assert(output.shape[0] == num_classes) |
|
|
|
|
|
raw_labels = np.argmax(output, axis=0).astype(np.uint8) |
|
|
|
|
|
confidence = float(np.max(output, axis=0).mean()) |
|
|
|
|
|
result_img = colorize(raw_labels) |
|
|
|
|
|
blended_img = cv2.addWeighted(image[:, :, ::-1], 0.5, result_img, 0.5, 0) |
|
|
|
result_img = Image.fromarray(result_img) |
|
blended_img = Image.fromarray(blended_img) |
|
|
|
return confidence, result_img, blended_img, raw_labels |
|
|
|
def inference(img): |
|
input_image = Image.open(img) |
|
orig_tensor = np.asarray(input_image) |
|
input_tensor = preprocess(input_image) |
|
input_tensor = input_tensor.unsqueeze(0) |
|
input_tensor = input_tensor.detach().cpu().numpy() |
|
output_names = list(map(lambda output: output.name, outputs)) |
|
input_name = sess.get_inputs()[0].name |
|
detections = sess.run(output_names, {input_name: input_tensor}) |
|
output, aux = detections |
|
conf, result_img, blended_img, _ = visualize_output(orig_tensor, output[0]) |
|
return blended_img |
|
|
|
title="Mask R-CNN" |
|
description="This model is a real-time neural network for object instance segmentation that detects 80 different classes." |
|
examples=[["examplefcn.png"]] |
|
gr.Interface(inference,gr.inputs.Image(type="filepath"),gr.outputs.Image(type="pil"),title=title,description=description,examples=examples).launch(enable_queue=True) |