Harinvnukala commited on
Commit
3a295f2
·
verified ·
1 Parent(s): 3677256

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -0
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ from ultralytics import YOLO, solutions
3
+ import torch
4
+ import numpy as np
5
+ from collections import defaultdict
6
+ import gradio as gr
7
+
8
+ device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
9
+ print("Device:", device)
10
+
11
+ # Load MiDaS model for depth estimation
12
+ midas = torch.hub.load("intel-isl/MiDaS", "MiDaS_small")
13
+ midas.to(device)
14
+ midas.eval()
15
+ midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms").small_transform
16
+
17
+ # Load YOLO model
18
+ model = YOLO('yolov8x.pt')
19
+ names = model.model.names
20
+ model.to(device)
21
+
22
+ pixels_per_meter = 300
23
+ unattended_threshold = 2.0 # meters
24
+
25
+ dist_obj = solutions.DistanceCalculation(names=names, view_img=False, pixels_per_meter=pixels_per_meter)
26
+
27
+ # Set model parameters
28
+ model.overrides['conf'] = 0.5 # NMS confidence threshold
29
+ model.overrides['iou'] = 0.5 # NMS IoU threshold
30
+ model.overrides['agnostic_nms'] = True # NMS class-agnostic
31
+ model.overrides['max_det'] = 1000 # maximum number of detections per image
32
+
33
+ # Store scores for each person-luggage pair using tracker ID
34
+ ownership_scores = defaultdict(lambda: defaultdict(int))
35
+
36
+ def calculate_distance(depth_map, point1, point2):
37
+ dist_2d_m, dist_2d_mm = dist_obj.calculate_distance(point1, point2)
38
+ z1 = depth_map[int(point1[1]), int(point1[0])] / pixels_per_meter
39
+ z2 = depth_map[int(point2[1]), int(point2[0])] / pixels_per_meter
40
+ depth_diff = np.abs(z1 - z2)
41
+ distance = np.sqrt(dist_2d_m ** 2 + depth_diff ** 2)
42
+ return distance
43
+
44
+ def process_video(video_source):
45
+ cap = cv2.VideoCapture(video_source)
46
+ if not cap.isOpened():
47
+ print("Error: Could not open video.")
48
+ return
49
+
50
+ owners = {} # Store assigned owners for luggage using tracker ID
51
+ abandoned_luggages = set() # Store abandoned luggage using tracker ID
52
+
53
+ frame_count = 0
54
+ output_frames = [] # Store the processed frames to return as video
55
+
56
+ while cap.isOpened():
57
+ ret, frame = cap.read()
58
+ frame_count += 1
59
+ if not ret:
60
+ break
61
+ if frame_count % 10 != 0:
62
+ continue
63
+ results = model.track(frame, persist=True, classes=[0, 28, 24, 26], show=False)
64
+ frame_ = results[0].plot()
65
+
66
+ # MiDaS depth estimation
67
+ img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
68
+ input_batch = midas_transforms(img).to(device)
69
+ with torch.no_grad():
70
+ prediction = midas(input_batch)
71
+ prediction = torch.nn.functional.interpolate(
72
+ prediction.unsqueeze(1),
73
+ size=img.shape[:2],
74
+ mode="bicubic",
75
+ align_corners=False,
76
+ ).squeeze()
77
+ depth_map = prediction.cpu().numpy()
78
+
79
+ persons = []
80
+ luggages = []
81
+ num_boxes = len(results[0].boxes)
82
+ for i in range(num_boxes):
83
+ box = results[0].boxes[i]
84
+ centroid = get_centroid(box)
85
+ track_id = box.id
86
+ if box.cls == 0:
87
+ persons.append((track_id, centroid))
88
+ elif box.cls in [24, 28, 26]:
89
+ luggages.append((track_id, centroid))
90
+
91
+ for person_id, person_centroid in persons:
92
+ for luggage_id, luggage_centroid in luggages:
93
+ distance_m = calculate_distance(depth_map, person_centroid, luggage_centroid)
94
+ if distance_m <= unattended_threshold and luggage_id not in abandoned_luggages:
95
+ ownership_scores[luggage_id][person_id] += 1
96
+
97
+ for luggage_id, luggage_centroid in luggages:
98
+ person_in_range = any(
99
+ calculate_distance(depth_map, person_centroid, luggage_centroid) <= unattended_threshold
100
+ for person_id, person_centroid in persons
101
+ )
102
+
103
+ if not person_in_range and luggage_id not in abandoned_luggages:
104
+ abandoned_luggages.add(luggage_id)
105
+
106
+ # Visualization
107
+ for box in results[0].boxes:
108
+ xyxy = box.xyxy[0].cpu().numpy().astype(int)
109
+ cv2.rectangle(frame_, (xyxy[0], xyxy[1]), (xyxy[2], xyxy[3]), (0, 255, 0), 2)
110
+ centroid = get_centroid(box)
111
+ cv2.circle(frame_, (int(centroid[0]), int(centroid[1])), 5, (0, 255, 0), -1)
112
+
113
+ output_frames.append(frame_)
114
+
115
+ cap.release()
116
+ cv2.destroyAllWindows()
117
+
118
+ return output_frames
119
+
120
+ def get_centroid(box):
121
+ return dist_obj.calculate_centroid(box.xyxy[0].cpu().numpy().astype(int))
122
+
123
+ def video_interface(video):
124
+ processed_frames = process_video(video)
125
+ return processed_frames[0] if processed_frames else None
126
+
127
+ # Create a Gradio interface
128
+ interface = gr.Interface(fn=video_interface, inputs="video", outputs="video", title="Abandoned Object Detection")
129
+
130
+ if __name__ == "__main__":
131
+ interface.launch()