Spaces:
Runtime error
Runtime error
File size: 20,272 Bytes
ac6e446 268b2ef fa86b15 0436c65 fa86b15 ac6e446 e2a0a8f ac6e446 4eff630 ac6e446 59aac49 f3a6719 ac6e446 4eff630 ac6e446 8d0a38e 726b7df ac6e446 e2a0a8f 4ebfc28 ac6e446 3a78219 ac6e446 9890183 ac6e446 9890183 ac6e446 941336a ac6e446 c3716b2 ac6e446 40b826b ac6e446 40b826b ac6e446 40b826b ac6e446 f11a2de ac6e446 59aac49 ac6e446 1a54db0 ac6e446 5eb701d ad1a4bb 1a54db0 40b826b 1a54db0 ac6e446 59aac49 4eff630 30d8e51 9dc1e33 23abd94 1a54db0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 |
import argparse
import cv2
import os
# limit the number of cpus used by high performance libraries
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
import sys
import platform
import numpy as np
from pathlib import Path
import torch
import torch.backends.cudnn as cudnn
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0] # yolov5 strongsort root directory
WEIGHTS = ROOT / 'weights'
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
if str(ROOT / 'yolov8') not in sys.path:
sys.path.append(str(ROOT / 'yolov8')) # add yolov5 ROOT to PATH
if str(ROOT / 'trackers' / 'strongsort') not in sys.path:
sys.path.append(str(ROOT / 'trackers' / 'strongsort')) # add strong_sort ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
import logging
from ultralytics.nn.autobackend import AutoBackend
from ultralytics.yolo.data.dataloaders.stream_loaders import LoadImages, LoadStreams
from ultralytics.yolo.data.utils import IMG_FORMATS, VID_FORMATS
from ultralytics.yolo.utils import DEFAULT_CFG, LOGGER, SETTINGS, callbacks, colorstr, ops
from ultralytics.yolo.utils.checks import check_file, check_imgsz, check_imshow, print_args, check_requirements
from ultralytics.yolo.utils.files import increment_path
from ultralytics.yolo.utils.torch_utils import select_device
from ultralytics.yolo.utils.ops import Profile, non_max_suppression, scale_boxes, process_mask, process_mask_native
from ultralytics.yolo.utils.plotting import Annotator, colors, save_one_box
from trackers.multi_tracker_zoo import create_tracker
@torch.no_grad()
def run(
source='0',
yolo_weights=WEIGHTS / 'yolov5m.pt', # model.pt path(s),
reid_weights=WEIGHTS / 'osnet_x0_25_msmt17.pt', # model.pt path,
tracking_method='strongsort',
tracking_config=None,
imgsz=(640, 640), # inference size (height, width)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
max_det=1000, # maximum detections per image
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
show_vid=False, # show results
save_txt=False, # save results to *.txt
save_conf=False, # save confidences in --save-txt labels
save_crop=False, # save cropped prediction boxes
save_trajectories=False, # save trajectories for each track
save_vid=True, # save confidences in --save-txt labels
nosave=False, # do not save images/videos
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=False, # class-agnostic NMS
augment=False, # augmented inference
visualize=False, # visualize features
update=False, # update all models
#project=ROOT / 'runs' / 'track', # save results to project/name
project=ROOT ,# save results to project/name
name='exp', # save results to project/name
exist_ok=True, # existing project/name ok, do not increment
line_thickness=2, # bounding box thickness (pixels)
hide_labels=False, # hide labels
hide_conf=False, # hide confidences
hide_class=False, # hide IDs
half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
vid_stride=1, # video frame-rate stride
retina_masks=False,
):
#print the inputs
print(f"model used : {yolo_weights}, tracking method : {tracking_method}")
source = str(source)
save_img = not nosave and not source.endswith('.txt') # save inference images
is_file = Path(source).suffix[1:] in (VID_FORMATS)
is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
if is_url and is_file:
source = check_file(source) # download
# Directories
if not isinstance(yolo_weights, list): # single yolo model
exp_name = yolo_weights.stem
elif type(yolo_weights) is list and len(yolo_weights) == 1: # single models after --yolo_weights
exp_name = Path(yolo_weights[0]).stem
else: # multiple models after --yolo_weights
exp_name = 'ensemble'
exp_name = name if name else exp_name + "_" + reid_weights.stem
save_dir = increment_path(Path(project) / exp_name, exist_ok=exist_ok) # increment run
(save_dir / 'tracks' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Load model
device = select_device(device)
is_seg = '-seg' in str(yolo_weights)
model = AutoBackend(yolo_weights, device=device, dnn=dnn, fp16=half)
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_imgsz(imgsz, stride=stride) # check image size
# Dataloader
bs = 1
if webcam:
show_vid = check_imshow(warn=True)
dataset = LoadStreams(
source,
imgsz=imgsz,
stride=stride,
auto=pt,
transforms=getattr(model.model, 'transforms', None),
vid_stride=vid_stride
)
bs = len(dataset)
else:
dataset = LoadImages(
source,
imgsz=imgsz,
stride=stride,
auto=pt,
transforms=getattr(model.model, 'transforms', None),
vid_stride=vid_stride
)
vid_path, vid_writer, txt_path = [None] * bs, [None] * bs, [None] * bs
model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup
# Create as many strong sort instances as there are video sources
tracker_list = []
for i in range(bs):
tracker = create_tracker(tracking_method, tracking_config, reid_weights, device, half)
tracker_list.append(tracker, )
if hasattr(tracker_list[i], 'model'):
if hasattr(tracker_list[i].model, 'warmup'):
tracker_list[i].model.warmup()
outputs = [None] * bs
# Run tracking
#model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup
seen, windows, dt = 0, [], (Profile(), Profile(), Profile(), Profile())
curr_frames, prev_frames = [None] * bs, [None] * bs
for frame_idx, batch in enumerate(dataset):
path, im, im0s, vid_cap, s = batch
visualize = increment_path(save_dir / Path(path[0]).stem, mkdir=True) if visualize else False
with dt[0]:
im = torch.from_numpy(im).to(device)
im = im.half() if half else im.float() # uint8 to fp16/32
im /= 255.0 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
# Inference
with dt[1]:
preds = model(im, augment=augment, visualize=visualize)
# Apply NMS
with dt[2]:
if is_seg:
masks = []
p = non_max_suppression(preds[0], conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, nm=32)
proto = preds[1][-1]
else:
p = non_max_suppression(preds, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
# Process detections
filename = 'out.mp4'
for i, det in enumerate(p): # detections per image
seen += 1
if webcam: # bs >= 1
p, im0, _ = path[i], im0s[i].copy(), dataset.count
p = Path(p) # to Path
s += f'{i}: '
txt_file_name = p.name
save_path = str(save_dir / filename) # im.jpg, vid.mp4, ...
else:
p, im0, _ = path, im0s.copy(), getattr(dataset, 'frame', 0)
p = Path(p) # to Path
# video file
if source.endswith(VID_FORMATS):
txt_file_name = p.stem
save_path = str(save_dir / filename) # im.jpg, vid.mp4, ...
LOGGER.info(f"p.name is {p.name}, save_path value is {save_path}")
# folder with imgs
else:
txt_file_name = p.parent.name # get folder name containing current img
save_path = str(save_dir / p.parent.name) # im.jpg, vid.mp4, ...
curr_frames[i] = im0
txt_path = str(save_dir / 'tracks' / txt_file_name) # im.txt
s += '%gx%g ' % im.shape[2:] # print string
imc = im0.copy() if save_crop else im0 # for save_crop
annotator = Annotator(im0, line_width=line_thickness, example=str(names))
if hasattr(tracker_list[i], 'tracker') and hasattr(tracker_list[i].tracker, 'camera_update'):
if prev_frames[i] is not None and curr_frames[i] is not None: # camera motion compensation
tracker_list[i].tracker.camera_update(prev_frames[i], curr_frames[i])
if det is not None and len(det):
if is_seg:
shape = im0.shape
# scale bbox first the crop masks
if retina_masks:
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], shape).round() # rescale boxes to im0 size
masks.append(process_mask_native(proto[i], det[:, 6:], det[:, :4], im0.shape[:2])) # HWC
else:
masks.append(process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True)) # HWC
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], shape).round() # rescale boxes to im0 size
else:
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round() # rescale boxes to im0 size
# Print results
for c in det[:, 5].unique():
n = (det[:, 5] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
# pass detections to strongsort
with dt[3]:
outputs[i] = tracker_list[i].update(det.cpu(), im0)
# draw boxes for visualization
if len(outputs[i]) > 0:
if is_seg:
# Mask plotting
annotator.masks(
masks[i],
colors=[colors(x, True) for x in det[:, 5]],
im_gpu=torch.as_tensor(im0, dtype=torch.float16).to(device).permute(2, 0, 1).flip(0).contiguous() /
255 if retina_masks else im[i]
)
for j, (output) in enumerate(outputs[i]):
bbox = output[0:4]
id = output[4]
cls = output[5]
conf = output[6]
if save_txt:
# to MOT format
bbox_left = output[0]
bbox_top = output[1]
bbox_w = output[2] - output[0]
bbox_h = output[3] - output[1]
# Write MOT compliant results to file
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * 10 + '\n') % (frame_idx + 1, id, bbox_left, # MOT format
bbox_top, bbox_w, bbox_h, -1, -1, -1, i))
if save_vid or save_crop or show_vid: # Add bbox/seg to image
c = int(cls) # integer class
id = int(id) # integer id
label = None if hide_labels else (f'{id} {names[c]}' if hide_conf else \
(f'{id} {conf:.2f}' if hide_class else f'{id} {names[c]} {conf:.2f}'))
color = colors(c, True)
annotator.box_label(bbox, label, color=color)
if save_trajectories and tracking_method == 'strongsort':
q = output[7]
tracker_list[i].trajectory(im0, q, color=color)
if save_crop:
txt_file_name = txt_file_name if (isinstance(path, list) and len(path) > 1) else ''
save_one_box(np.array(bbox, dtype=np.int16), imc, file=save_dir / 'crops' / txt_file_name / names[c] / f'{id}' / f'{p.stem}.jpg', BGR=True)
else:
pass
#tracker_list[i].tracker.pred_n_update_all_tracks()
# Stream results
im0 = annotator.result()
if show_vid:
if platform.system() == 'Linux' and p not in windows:
windows.append(p)
cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux)
cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
cv2.imshow(str(p), im0)
if cv2.waitKey(1) == ord('q'): # 1 millisecond
exit()
# Save results (image with detections)
if save_vid:
LOGGER.info(f"vid_path, save_path {vid_path[i]}{save_path}")
if vid_path[i] != save_path: # new video
vid_path[i] = save_path
if isinstance(vid_writer[i], cv2.VideoWriter):
vid_writer[i].release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
LOGGER.info(f"test Results saved to {colorstr('bold', save_path)}")
vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer[i].write(im0)
prev_frames[i] = curr_frames[i]
# Print total time (preprocessing + inference + NMS + tracking)
LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{sum([dt.dt for dt in dt if hasattr(dt, 'dt')]) * 1E3:.1f}ms")
# Print results
t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS, %.1fms {tracking_method} update per image at shape {(1, 3, *imgsz)}' % t)
if save_txt or save_vid:
s = f"\n{len(list((save_dir / 'tracks').glob('*.txt')))} tracks saved to {save_dir / 'tracks'}" if save_txt else ''
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
if update:
strip_optimizer(yolo_weights) # update model (to fix SourceChangeWarning)
def parse_opt():
parser = argparse.ArgumentParser()
#parser.add_argument('--yolo-weights', nargs='+', type=Path, default=WEIGHTS / 'yolov8s-seg.pt', help='model.pt path(s)')
parser.add_argument('--reid-weights', type=Path, default=WEIGHTS / 'osnet_x0_25_msmt17.pt')
#parser.add_argument('--tracking-method', type=str, default='bytetrack', help='strongsort, ocsort, bytetrack')
parser.add_argument('--tracking-config', type=Path, default=None)
#parser.add_argument('--source', type=str, default='0', help='file/dir/URL/glob, 0 for webcam')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
parser.add_argument('--conf-thres', type=float, default=0.5, help='confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.5, help='NMS IoU threshold')
parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--show-vid', action='store_true', help='display tracking video results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
parser.add_argument('--save-trajectories', action='store_true', help='save trajectories for each track')
parser.add_argument('--save-vid', action='store_true',default=True, help='save video tracking results')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
# class 0 is person, 1 is bycicle, 2 is car... 79 is oven
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--visualize', action='store_true', help='visualize features')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default=ROOT , help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to ROOT')
parser.add_argument('--exist-ok', default='True', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--line-thickness', default=2, type=int, help='bounding box thickness (pixels)')
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
parser.add_argument('--hide-class', default=False, action='store_true', help='hide IDs')
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
parser.add_argument('--retina-masks', action='store_true', help='whether to plot masks in native resolution')
#opt = parser.parse_args()
#opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
#opt.tracking_config = ROOT / 'trackers' / opt.tracking_method / 'configs' / (opt.tracking_method + '.yaml')
#print_args(vars(opt))
#return opt
return parser
def main(opt):
check_requirements(requirements=ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
run(**vars(opt))
#if __name__ == "__main__":
# opt = parse_opt()
# main(opt)
def MOT(yoloweights, trackingmethod, sourceVideo):
parser = parse_opt()
parser.add_argument('--yolo-weights', nargs='+', type=Path, default= yoloweights, help='model.pt path(s)')
parser.add_argument('--tracking-method', type=str, default= trackingmethod, help='strongsort, ocsort, bytetrack')
parser.add_argument('--source', type=str, default=sourceVideo, help='file/dir/URL/glob, 0 for webcam')
opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
opt.tracking_config = ROOT / 'trackers' / opt.tracking_method / 'configs' / (opt.tracking_method + '.yaml')
print_args(vars(opt))
main(opt)
save_dir = increment_path('exp', exist_ok=True)
input = os.path.join(save_dir,'out.mp4')
outpath = 'output.mp4' #'output/'+ 'output.mp4'
if os.path.exists(outpath):
os.remove(outpath)
command = f"ffmpeg -i {input} -vf fps=30 -vcodec libx264 {outpath}"
print(command)
os.system(command)
return outpath |