Spaces:
Sleeping
Sleeping
File size: 5,216 Bytes
e176112 59d8758 c2d34b4 7e0d391 c2d34b4 60042f7 be5c43c 7e0d391 c2d34b4 41ffe92 c2d34b4 a9d3fa2 62cf07b c2d34b4 23b399d c2d34b4 41ffe92 c2d34b4 23b399d c2d34b4 9a41c33 7962c9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import supervision
import tqdm
import os
from ultralytics import YOLO
from dataclasses import dataclass
from onemetric.cv.utils.iou import box_iou_batch
from supervision import Point
from supervision import Detections, BoxAnnotator
from supervision import draw_text
from supervision import Color
from supervision import VideoInfo
from supervision import get_video_frames_generator
from supervision import VideoSink
os.system("pip install git+https://github.com/ifzhang/ByteTrack")
from typing import List
import numpy as np
import gradio as gr
from tqdm import tqdm
import yolox
os.system("pip3 install cython_bbox gdown 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'")
os.system("pip3 install -v -e .")
from yolox.tracker.byte_tracker import BYTETracker, STrack
MODEL = "./best.pt"
TARGET_VIDEO_PATH = "test.mp4"
CLASS_ID = [0,1,2,3,4,5,6]
video_examples = [['example.mp4']]
model = YOLO(MODEL)
model.fuse()
classes = CLASS_ID
@dataclass(frozen=True)
class BYTETrackerArgs:
track_thresh: float = 0.25
track_buffer: int = 30
match_thresh: float = 0.8
aspect_ratio_thresh: float = 3.0
min_box_area: float = 1.0
mot20: bool = False
# converts Detections into format that can be consumed by match_detections_with_tracks function
def detections2boxes(detections : Detections) -> np.ndarray:
return np.hstack((
detections.xyxy,
detections.confidence[:, np.newaxis]
))
# converts List[STrack] into format that can be consumed by match_detections_with_tracks function
def tracks2boxes(tracks: List[STrack]) -> np.ndarray:
return np.array([
track.tlbr
for track
in tracks
], dtype=float)
# matches our bounding boxes with predictions
def match_detections_with_tracks(
detections: Detections,
tracks: List[STrack],
) -> Detections:
if not np.any(detections.xyxy) or len(tracks) == 0:
return np.empty((0,))
tracks_boxes = tracks2boxes(tracks=tracks)
iou = box_iou_batch(tracks_boxes, detections.xyxy)
track2detection = np.argmax(iou, axis=1)
tracker_ids = [None] * len(detections)
for tracker_index, detection_index in enumerate(track2detection):
if iou[tracker_index, detection_index] != 0:
tracker_ids[detection_index] = tracks[tracker_index].track_id
return tracker_ids
def ObjectDetection(video_path):
byte_tracker = BYTETracker(BYTETrackerArgs())
video_info = VideoInfo.from_video_path(video_path)
generator = get_video_frames_generator(video_path)
box_annotator = BoxAnnotator(thickness=5, text_thickness=5, text_scale=1)
#polygon
polygon = np.array([[200,300], [200,1420], [880, 1420], [880, 300]])
#zone
zone = supervision.PolygonZone(polygon=polygon, frame_resolution_wh=video_info.resolution_wh)
#zone annotator
zone_annotator = supervision.PolygonZoneAnnotator(zone=zone, color=Color.white(), thickness=4)
# open target video file
with VideoSink(TARGET_VIDEO_PATH, video_info) as sink:
# loop over video frames
for frame in tqdm(generator, total=video_info.total_frames):
results = model(frame)
detections = Detections(
xyxy=results[0].boxes.xyxy.cpu().numpy(),
confidence=results[0].boxes.conf.cpu().numpy(),
class_id=results[0].boxes.cls.cpu().numpy().astype(int)
)
# filtering out detections with unwanted classes
detections = detections[np.isin(detections.class_id, CLASS_ID)]
# tracking detections
tracks = byte_tracker.update(
output_results=detections2boxes(detections = detections),
img_info=frame.shape,
img_size=frame.shape
)
tracker_id = match_detections_with_tracks(detections=detections, tracks=tracks)
detections.tracker_id = np.array(tracker_id)
# filtering out detections without trackers
detections = detections[np.not_equal(detections.tracker_id, None)]
# format custom labels
labels = [
f"#{tracker_id} {classes[class_id]} {confidence:0.2f}"
for _, _, confidence, class_id, tracker_id
in detections
]
t = np.unique(detections.class_id, return_counts =True)
# annotate and display frame
mask = zone.trigger(detections=detections)
detections_filtered = detections[mask]
t = np.unique(detections_filtered.class_id, return_counts =True)
for x in zip(t[0], t[1]):
frame = draw_text(background_color=Color.white(), scene=frame, text=' '.join((str(classes[x[0]]), ':', str(x[1]))), text_anchor=Point(x=500, y=1550 + (50 * x[0])), text_scale = 2, text_thickness = 4)
frame = box_annotator.annotate(scene=frame, detections=detections_filtered, labels=labels)
frame = zone_annotator.annotate(scene=frame)
sink.write_frame(frame)
return TARGET_VIDEO_PATH
demo = gr.Interface(fn=ObjectDetection, inputs=gr.Video(), outputs=gr.Video(), examples=video_examples, cache_examples=False)
demo.queue().launch() |