Spaces:
Runtime error
Runtime error
File size: 5,099 Bytes
1cdc47e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
from __future__ import division
import time
import torch
import numpy as np
import cv2
import os
import sys
import random
import pickle as pkl
import argparse
from util import *
from darknet import Darknet
from preprocess import letterbox_image
import preprocess
cur_dir = os.path.dirname(os.path.realpath(__file__))
project_root = os.path.join(cur_dir, '../../../')
chk_root = os.path.join(project_root, 'checkpoint/')
data_root = os.path.join(project_root, 'data/')
sys.path.insert(0, project_root)
sys.path.pop(0)
def prep_image(img, inp_dim):
"""
Prepare image for inputting to the neural network.
Returns a Variable
"""
ori_img = img
dim = ori_img.shape[1], ori_img.shape[0]
img = cv2.resize(ori_img, (inp_dim, inp_dim))
img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
return img_, ori_img, dim
def write(x, img, colors):
x = [int(i) for i in x]
c1 = tuple(x[0:2])
c2 = tuple(x[2:4])
label = 'People {}'.format(0)
color = (0, 0, 255)
cv2.rectangle(img, c1, c2, color, 2)
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
cv2.rectangle(img, c1, c2, color, -1)
cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1)
return img
def arg_parse():
""""
Parse arguements to the detect module
"""
parser = argparse.ArgumentParser(description='YOLO v3 Cam Demo')
parser.add_argument('--confidence', dest='confidence', type=float, default=0.70,
help='Object Confidence to filter predictions')
parser.add_argument('--nms-thresh', dest='nms_thresh', type=float, default=0.4, help='NMS Threshold')
parser.add_argument('--reso', dest='reso', default=416, type=int, help='Input resolution of the network. '
'Increase to increase accuracy. Decrease to increase speed. (160, 416)')
parser.add_argument('-wf', '--weight-file', type=str, default=chk_root + 'yolov3/yolov3.weights', help='The path'
'of model weight file')
parser.add_argument('-cf', '--cfg-file', type=str, default=cur_dir + '/cfg/yolov3.cfg', help='weight file')
parser.add_argument('-a', '--animation', action='store_true', help='output animation')
parser.add_argument('-v', '--video', type=str, default='camera', help='The input video path')
parser.add_argument('-i', '--image', type=str, default=cur_dir + '/data/dog-cycle-car.png',
help='The input video path')
parser.add_argument('-np', '--num-person', type=int, default=1, help='number of estimated human poses. [1, 2]')
return parser.parse_args()
def load_model(args=None, CUDA=None, inp_dim=416):
if args is None:
args = arg_parse()
if CUDA is None:
CUDA = torch.cuda.is_available()
# Set up the neural network
model = Darknet(args.cfg_file)
model.load_weights(args.weight_file)
model.net_info["height"] = inp_dim
assert inp_dim % 32 == 0
assert inp_dim > 32
# If there's a GPU availible, put the model on GPU
if CUDA:
model.cuda()
# Set the model in evaluation mode
model.eval()
return model
def yolo_human_det(img, model=None, reso=416, confidence=0.70):
args = arg_parse()
# args.reso = reso
inp_dim = reso
num_classes = 80
CUDA = torch.cuda.is_available()
if model is None:
model = load_model(args, CUDA, inp_dim)
if type(img) == str:
assert os.path.isfile(img), 'The image path does not exist'
img = cv2.imread(img)
img, ori_img, img_dim = preprocess.prep_image(img, inp_dim)
img_dim = torch.FloatTensor(img_dim).repeat(1, 2)
with torch.no_grad():
if CUDA:
img_dim = img_dim.cuda()
img = img.cuda()
output = model(img, CUDA)
output = write_results(output, confidence, num_classes, nms=True, nms_conf=args.nms_thresh, det_hm=True)
if len(output) == 0:
return None, None
img_dim = img_dim.repeat(output.size(0), 1)
scaling_factor = torch.min(inp_dim / img_dim, 1)[0].view(-1, 1)
output[:, [1, 3]] -= (inp_dim - scaling_factor * img_dim[:, 0].view(-1, 1)) / 2
output[:, [2, 4]] -= (inp_dim - scaling_factor * img_dim[:, 1].view(-1, 1)) / 2
output[:, 1:5] /= scaling_factor
for i in range(output.shape[0]):
output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, img_dim[i, 0])
output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, img_dim[i, 1])
bboxs = []
scores = []
for i in range(len(output)):
item = output[i]
bbox = item[1:5].cpu().numpy()
# conver float32 to .2f data
bbox = [round(i, 2) for i in list(bbox)]
score = item[5].cpu().numpy()
bboxs.append(bbox)
scores.append(score)
scores = np.expand_dims(np.array(scores), 1)
bboxs = np.array(bboxs)
return bboxs, scores
|