Runtime error
Runtime error
import cv2 | |
import time | |
import requests | |
import random | |
import numpy as np | |
from PIL import Image | |
from pathlib import Path | |
from collections import OrderedDict,namedtuple | |
import onnxruntime as ort | |
import torch | |
import torchvision | |
import math | |
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): | |
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 | |
box2 = box2.T | |
# Get the coordinates of bounding boxes | |
if x1y1x2y2: # x1, y1, x2, y2 = box1 | |
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] | |
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] | |
else: # transform from xywh to xyxy | |
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 | |
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 | |
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 | |
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 | |
# Intersection area | |
inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ | |
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) | |
# Union Area | |
w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps | |
w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps | |
union = w1 * h1 + w2 * h2 - inter + eps | |
iou = inter / union | |
if GIoU or DIoU or CIoU: | |
cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width | |
ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height | |
if CIoU or DIoU: # Distance or Complete IoU | |
c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared | |
rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + | |
(b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared | |
if DIoU: | |
return iou - rho2 / c2 # DIoU | |
elif CIoU: # | |
v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / (h2 + eps)) - torch.atan(w1 / (h1 + eps)), 2) | |
with torch.no_grad(): | |
alpha = v / (v - iou + (1 + eps)) | |
return iou - (rho2 / c2 + v * alpha) # CIoU | |
else: # GIoU | |
c_area = cw * ch + eps # convex area | |
return iou - (c_area - union) / c_area # GIoU | |
else: | |
return iou # IoU | |
def xywh2xyxy(x): | |
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right | |
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) | |
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x | |
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y | |
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x | |
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y | |
return y | |
def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, | |
labels=()): | |
"""Runs Non-Maximum Suppression (NMS) on inference results | |
Returns: | |
list of detections, on (n,6) tensor per image [xyxy, conf, cls] | |
""" | |
nc = prediction.shape[2] - 5 # number of classes | |
xc = prediction[..., 4] > conf_thres # candidates | |
# Settings | |
min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height | |
max_det = 300 # maximum number of detections per image | |
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() | |
time_limit = 10.0 # seconds to quit after | |
redundant = True # require redundant detections | |
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) | |
merge = False # use merge-NMS | |
t = time.time() | |
output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] | |
for xi, x in enumerate(prediction): # image index, image inference | |
# Apply constraints | |
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height | |
x = x[xc[xi]] # confidence | |
# Cat apriori labels if autolabelling | |
if labels and len(labels[xi]): | |
l = labels[xi] | |
v = torch.zeros((len(l), nc + 5), device=x.device) | |
v[:, :4] = l[:, 1:5] # box | |
v[:, 4] = 1.0 # conf | |
v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls | |
x =, v), 0) | |
# If none remain process next image | |
if not x.shape[0]: | |
continue | |
# Compute conf | |
if nc == 1: | |
x[:, 5:] = x[:, 4:5] # for models with one class, cls_loss is 0 and cls_conf is always 0.5, | |
# so there is no need to multiplicate. | |
else: | |
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf | |
# Box (center x, center y, width, height) to (x1, y1, x2, y2) | |
box = xywh2xyxy(x[:, :4]) | |
# Detections matrix nx6 (xyxy, conf, cls) | |
if multi_label: | |
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T | |
x =[i], x[i, j + 5, None], j[:, None].float()), 1) | |
else: # best class only | |
conf, j = x[:, 5:].max(1, keepdim=True) | |
x =, conf, j.float()), 1)[conf.view(-1) > conf_thres] | |
# Filter by class | |
if classes is not None: | |
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] | |
# Apply finite constraint | |
# if not torch.isfinite(x).all(): | |
# x = x[torch.isfinite(x).all(1)] | |
# Check shape | |
n = x.shape[0] # number of boxes | |
if not n: # no boxes | |
continue | |
elif n > max_nms: # excess boxes | |
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence | |
# Batched NMS | |
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes | |
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores | |
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS | |
if i.shape[0] > max_det: # limit detections | |
i = i[:max_det] | |
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) | |
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) | |
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix | |
weights = iou * scores[None] # box weights | |
x[i, :4] =, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes | |
if redundant: | |
i = i[iou.sum(1) > 1] # require redundancy | |
output[xi] = x[i] | |
if (time.time() - t) > time_limit: | |
print(f'WARNING: NMS time limit {time_limit}s exceeded') | |
break # time limit exceeded | |
return output | |
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32): | |
# Resize and pad image while meeting stride-multiple constraints | |
shape = im.shape[:2] # current shape [height, width] | |
if isinstance(new_shape, int): | |
new_shape = (new_shape, new_shape) | |
# Scale ratio (new / old) | |
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) | |
if not scaleup: # only scale down, do not scale up (for better val mAP) | |
r = min(r, 1.0) | |
# Compute padding | |
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) | |
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding | |
if auto: # minimum rectangle | |
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding | |
dw /= 2 # divide padding into 2 sides | |
dh /= 2 | |
if shape[::-1] != new_unpad: # resize | |
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) | |
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) | |
left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) | |
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border | |
return im, r, (dw, dh) | |
def get_layout_results(img, onnx_path): | |
providers = ['CPUExecutionProvider'] | |
session = ort.InferenceSession(onnx_path, providers=providers) | |
names = ['Articles', 'Advertisement', 'Headlines', 'Sub-headlines', 'Graphics', 'Images', 'Tables', 'Text Block', 'Header'] | |
# colors = {name:[random.randint(0, 255) for _ in range(3)] for i,name in enumerate(names)} | |
# instead of random color, use specific easily distinguishable colors for each class | |
colors = { | |
'Articles': [255, 0, 0], # Red | |
'Advertisement': [0, 255, 0], # Green | |
'Headlines': [0, 0, 255], # Blue | |
'Sub-headlines': [255, 255, 0], # Yellow | |
'Graphics': [255, 0, 255], # Magenta | |
'Images': [128, 0, 128], # Purple | |
'Tables': [0, 255, 255], # Teal | |
'Text Block': [0, 128, 128], # Navy | |
'Header': [0, 0, 0] # Black | |
} | |
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
image = img.copy() | |
image, ratio, dwdh = letterbox(image, auto=False) | |
image = image.transpose((2, 0, 1)) | |
image = np.expand_dims(image, 0) | |
image = np.ascontiguousarray(image) | |
im = image.astype(np.float32) | |
im /= 255.0 | |
outname = [ for i in session.get_outputs()] | |
inname = [ for i in session.get_inputs()] | |
inp = {inname[0]:im} | |
# ONNX inference | |
outputs =, inp)[0] | |
# convert to torch tensor | |
outputs = torch.from_numpy(outputs) | |
det = non_max_suppression(outputs, 0.25, 0.45, classes=None, agnostic=False)[0] # conf_thres=0.25, iou_thres=0.45 | |
results = [] | |
# postprocess the output | |
for i,(x0,y0,x1,y1,score,cls_id) in enumerate(det): | |
box = np.array([x0,y0,x1,y1]) | |
box -= np.array(dwdh*2) | |
box /= ratio | |
box = box.round().astype(np.int32).tolist() | |
cls_id = int(cls_id) | |
score = round(float(score),3) | |
name = names[cls_id] | |
color = colors[name] | |
results.append([box, score, cls_id, color]) | |
return results | |
if __name__ == '__main__': | |
onnx_path = "/home/ubuntu/devesh/yolov7/runs/train/yolov7-custom9/weights/best.onnx" | |
img_ori = cv2.imread('/home/ubuntu/devesh/yolov7/Language_wise_imgs/Hindi/_Dainik_Navajyoti_-_04-11-2023_3.png') | |
lines = get_layout_results(img_ori, onnx_path) | |
print(lines[0]) |