Spaces:
Runtime error
Runtime error
File size: 4,855 Bytes
3a295f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import cv2
from ultralytics import YOLO, solutions
import torch
import numpy as np
from collections import defaultdict
import gradio as gr
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print("Device:", device)
# Load MiDaS model for depth estimation
midas = torch.hub.load("intel-isl/MiDaS", "MiDaS_small")
midas.to(device)
midas.eval()
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms").small_transform
# Load YOLO model
model = YOLO('yolov8x.pt')
names = model.model.names
model.to(device)
pixels_per_meter = 300
unattended_threshold = 2.0 # meters
dist_obj = solutions.DistanceCalculation(names=names, view_img=False, pixels_per_meter=pixels_per_meter)
# Set model parameters
model.overrides['conf'] = 0.5 # NMS confidence threshold
model.overrides['iou'] = 0.5 # NMS IoU threshold
model.overrides['agnostic_nms'] = True # NMS class-agnostic
model.overrides['max_det'] = 1000 # maximum number of detections per image
# Store scores for each person-luggage pair using tracker ID
ownership_scores = defaultdict(lambda: defaultdict(int))
def calculate_distance(depth_map, point1, point2):
dist_2d_m, dist_2d_mm = dist_obj.calculate_distance(point1, point2)
z1 = depth_map[int(point1[1]), int(point1[0])] / pixels_per_meter
z2 = depth_map[int(point2[1]), int(point2[0])] / pixels_per_meter
depth_diff = np.abs(z1 - z2)
distance = np.sqrt(dist_2d_m ** 2 + depth_diff ** 2)
return distance
def process_video(video_source):
cap = cv2.VideoCapture(video_source)
if not cap.isOpened():
print("Error: Could not open video.")
return
owners = {} # Store assigned owners for luggage using tracker ID
abandoned_luggages = set() # Store abandoned luggage using tracker ID
frame_count = 0
output_frames = [] # Store the processed frames to return as video
while cap.isOpened():
ret, frame = cap.read()
frame_count += 1
if not ret:
break
if frame_count % 10 != 0:
continue
results = model.track(frame, persist=True, classes=[0, 28, 24, 26], show=False)
frame_ = results[0].plot()
# MiDaS depth estimation
img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
input_batch = midas_transforms(img).to(device)
with torch.no_grad():
prediction = midas(input_batch)
prediction = torch.nn.functional.interpolate(
prediction.unsqueeze(1),
size=img.shape[:2],
mode="bicubic",
align_corners=False,
).squeeze()
depth_map = prediction.cpu().numpy()
persons = []
luggages = []
num_boxes = len(results[0].boxes)
for i in range(num_boxes):
box = results[0].boxes[i]
centroid = get_centroid(box)
track_id = box.id
if box.cls == 0:
persons.append((track_id, centroid))
elif box.cls in [24, 28, 26]:
luggages.append((track_id, centroid))
for person_id, person_centroid in persons:
for luggage_id, luggage_centroid in luggages:
distance_m = calculate_distance(depth_map, person_centroid, luggage_centroid)
if distance_m <= unattended_threshold and luggage_id not in abandoned_luggages:
ownership_scores[luggage_id][person_id] += 1
for luggage_id, luggage_centroid in luggages:
person_in_range = any(
calculate_distance(depth_map, person_centroid, luggage_centroid) <= unattended_threshold
for person_id, person_centroid in persons
)
if not person_in_range and luggage_id not in abandoned_luggages:
abandoned_luggages.add(luggage_id)
# Visualization
for box in results[0].boxes:
xyxy = box.xyxy[0].cpu().numpy().astype(int)
cv2.rectangle(frame_, (xyxy[0], xyxy[1]), (xyxy[2], xyxy[3]), (0, 255, 0), 2)
centroid = get_centroid(box)
cv2.circle(frame_, (int(centroid[0]), int(centroid[1])), 5, (0, 255, 0), -1)
output_frames.append(frame_)
cap.release()
cv2.destroyAllWindows()
return output_frames
def get_centroid(box):
return dist_obj.calculate_centroid(box.xyxy[0].cpu().numpy().astype(int))
def video_interface(video):
processed_frames = process_video(video)
return processed_frames[0] if processed_frames else None
# Create a Gradio interface
interface = gr.Interface(fn=video_interface, inputs="video", outputs="video", title="Abandoned Object Detection")
if __name__ == "__main__":
interface.launch()
|